github.com/mongey/nomad@v0.5.2/command/agent/consul/syncer.go (about)

     1  // Package consul is used by Nomad to register all services both static services
     2  // and dynamic via allocations.
     3  //
     4  // Consul Service IDs have the following format: ${nomadServicePrefix}-${groupName}-${serviceKey}
     5  // groupName takes on one of the following values:
     6  // - server
     7  // - client
     8  // - executor-${alloc-id}-${task-name}
     9  //
    10  // serviceKey should be generated by service registrators.
    11  // If the serviceKey is being generated by the executor for a Nomad Task.Services
    12  // the following helper should be used:
    13  //    NOTE: Executor should interpolate the service prior to calling
    14  //    func GenerateTaskServiceKey(service *structs.Service) string
    15  //
    16  // The Nomad Client reaps services registered from dead allocations that were
    17  // not properly cleaned up by the executor (this is not the expected case).
    18  //
    19  // TODO fix this comment
    20  // The Consul ServiceIDs generated by the executor will contain the allocation
    21  // ID. Thus the client can generate the list of Consul ServiceIDs to keep by
    22  // calling the following method on all running allocations the client is aware
    23  // of:
    24  // func GenerateExecutorServiceKeyPrefixFromAlloc(allocID string) string
    25  package consul
    26  
    27  import (
    28  	"fmt"
    29  	"log"
    30  	"net"
    31  	"net/url"
    32  	"strconv"
    33  	"strings"
    34  	"sync"
    35  	"time"
    36  
    37  	consul "github.com/hashicorp/consul/api"
    38  	"github.com/hashicorp/go-multierror"
    39  
    40  	"github.com/hashicorp/nomad/nomad/structs"
    41  	"github.com/hashicorp/nomad/nomad/structs/config"
    42  	"github.com/hashicorp/nomad/nomad/types"
    43  )
    44  
    45  const (
    46  	// initialSyncBuffer is the max time an initial sync will sleep
    47  	// before syncing.
    48  	initialSyncBuffer = 30 * time.Second
    49  
    50  	// initialSyncDelay is the delay before an initial sync.
    51  	initialSyncDelay = 5 * time.Second
    52  
    53  	// nomadServicePrefix is the first prefix that scopes all Nomad registered
    54  	// services
    55  	nomadServicePrefix = "_nomad"
    56  
    57  	// The periodic time interval for syncing services and checks with Consul
    58  	defaultSyncInterval = 6 * time.Second
    59  
    60  	// defaultSyncJitter provides a little variance in the frequency at which
    61  	// Syncer polls Consul.
    62  	defaultSyncJitter = time.Second
    63  
    64  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    65  	// the check result
    66  	ttlCheckBuffer = 31 * time.Second
    67  
    68  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    69  	// spend waiting for a response from a Consul Query.
    70  	DefaultQueryWaitDuration = 2 * time.Second
    71  
    72  	// ServiceTagHTTP is the tag assigned to HTTP services
    73  	ServiceTagHTTP = "http"
    74  
    75  	// ServiceTagRPC is the tag assigned to RPC services
    76  	ServiceTagRPC = "rpc"
    77  
    78  	// ServiceTagSerf is the tag assigned to Serf services
    79  	ServiceTagSerf = "serf"
    80  )
    81  
    82  // consulServiceID and consulCheckID are the IDs registered with Consul
    83  type consulServiceID string
    84  type consulCheckID string
    85  
    86  // ServiceKey is the generated service key that is used to build the Consul
    87  // ServiceID
    88  type ServiceKey string
    89  
    90  // ServiceDomain is the domain of services registered by Nomad
    91  type ServiceDomain string
    92  
    93  const (
    94  	ClientDomain ServiceDomain = "client"
    95  	ServerDomain ServiceDomain = "server"
    96  )
    97  
    98  // NewExecutorDomain returns a domain specific to the alloc ID and task
    99  func NewExecutorDomain(allocID, task string) ServiceDomain {
   100  	return ServiceDomain(fmt.Sprintf("executor-%s-%s", allocID, task))
   101  }
   102  
   103  // Syncer allows syncing of services and checks with Consul
   104  type Syncer struct {
   105  	client          *consul.Client
   106  	consulAvailable bool
   107  
   108  	// servicesGroups and checkGroups are named groups of services and checks
   109  	// respectively that will be flattened and reconciled with Consul when
   110  	// SyncServices() is called. The key to the servicesGroups map is unique
   111  	// per handler and is used to allow the Agent's services to be maintained
   112  	// independently of the Client or Server's services.
   113  	servicesGroups map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration
   114  	checkGroups    map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration
   115  	groupsLock     sync.RWMutex
   116  
   117  	// The "Consul Registry" is a collection of Consul Services and
   118  	// Checks all guarded by the registryLock.
   119  	registryLock sync.RWMutex
   120  
   121  	// checkRunners are delegated Consul checks being ran by the Syncer
   122  	checkRunners map[consulCheckID]*CheckRunner
   123  
   124  	addrFinder           func(portLabel string) (string, int)
   125  	createDelegatedCheck func(*structs.ServiceCheck, string) (Check, error)
   126  	delegateChecks       map[string]struct{} // delegateChecks are the checks that the Nomad client runs and reports to Consul
   127  	// End registryLock guarded attributes.
   128  
   129  	logger *log.Logger
   130  
   131  	shutdownCh   chan struct{}
   132  	shutdown     bool
   133  	shutdownLock sync.Mutex
   134  
   135  	// notifyShutdownCh is used to notify a Syncer it needs to shutdown.
   136  	// This can happen because there was an explicit call to the Syncer's
   137  	// Shutdown() method, or because the calling task signaled the
   138  	// program is going to exit by closing its shutdownCh.
   139  	notifyShutdownCh chan struct{}
   140  
   141  	// periodicCallbacks is walked sequentially when the timer in Run
   142  	// fires.
   143  	periodicCallbacks map[string]types.PeriodicCallback
   144  	notifySyncCh      chan struct{}
   145  	periodicLock      sync.RWMutex
   146  
   147  	// The periodic time interval for syncing services and checks with Consul
   148  	syncInterval time.Duration
   149  
   150  	// syncJitter provides a little variance in the frequency at which
   151  	// Syncer polls Consul.
   152  	syncJitter time.Duration
   153  }
   154  
   155  // NewSyncer returns a new consul.Syncer
   156  func NewSyncer(consulConfig *config.ConsulConfig, shutdownCh chan struct{}, logger *log.Logger) (*Syncer, error) {
   157  	var consulClientConfig *consul.Config
   158  	var err error
   159  	consulClientConfig, err = consulConfig.ApiConfig()
   160  	if err != nil {
   161  		return nil, err
   162  	}
   163  
   164  	var consulClient *consul.Client
   165  	if consulClient, err = consul.NewClient(consulClientConfig); err != nil {
   166  		return nil, err
   167  	}
   168  	consulSyncer := Syncer{
   169  		client:            consulClient,
   170  		logger:            logger,
   171  		consulAvailable:   true,
   172  		shutdownCh:        shutdownCh,
   173  		servicesGroups:    make(map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration),
   174  		checkGroups:       make(map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration),
   175  		checkRunners:      make(map[consulCheckID]*CheckRunner),
   176  		periodicCallbacks: make(map[string]types.PeriodicCallback),
   177  		notifySyncCh:      make(chan struct{}, 1),
   178  		// default noop implementation of addrFinder
   179  		addrFinder:   func(string) (string, int) { return "", 0 },
   180  		syncInterval: defaultSyncInterval,
   181  		syncJitter:   defaultSyncJitter,
   182  	}
   183  
   184  	return &consulSyncer, nil
   185  }
   186  
   187  // SetDelegatedChecks sets the checks that nomad is going to run and report the
   188  // result back to consul
   189  func (c *Syncer) SetDelegatedChecks(delegateChecks map[string]struct{}, createDelegatedCheckFn func(*structs.ServiceCheck, string) (Check, error)) *Syncer {
   190  	c.delegateChecks = delegateChecks
   191  	c.createDelegatedCheck = createDelegatedCheckFn
   192  	return c
   193  }
   194  
   195  // SetAddrFinder sets a function to find the host and port for a Service given its port label
   196  func (c *Syncer) SetAddrFinder(addrFinder func(string) (string, int)) *Syncer {
   197  	c.addrFinder = addrFinder
   198  	return c
   199  }
   200  
   201  // GenerateServiceKey should be called to generate a serviceKey based on the
   202  // Service.
   203  func GenerateServiceKey(service *structs.Service) ServiceKey {
   204  	var key string
   205  	numTags := len(service.Tags)
   206  	switch numTags {
   207  	case 0:
   208  		key = fmt.Sprintf("%s", service.Name)
   209  	default:
   210  		tags := strings.Join(service.Tags, "-")
   211  		key = fmt.Sprintf("%s-%s", service.Name, tags)
   212  	}
   213  	return ServiceKey(key)
   214  }
   215  
   216  // SetServices stores the map of Nomad Services to the provided service
   217  // domain name.
   218  func (c *Syncer) SetServices(domain ServiceDomain, services map[ServiceKey]*structs.Service) error {
   219  	var mErr multierror.Error
   220  	numServ := len(services)
   221  	registeredServices := make(map[ServiceKey]*consul.AgentServiceRegistration, numServ)
   222  	registeredChecks := make(map[ServiceKey][]*consul.AgentCheckRegistration, numServ)
   223  	for serviceKey, service := range services {
   224  		serviceReg, err := c.createService(service, domain, serviceKey)
   225  		if err != nil {
   226  			mErr.Errors = append(mErr.Errors, err)
   227  			continue
   228  		}
   229  		registeredServices[serviceKey] = serviceReg
   230  
   231  		// Register the check(s) for this service
   232  		for _, chk := range service.Checks {
   233  			// Create a Consul check registration
   234  			chkReg, err := c.createCheckReg(chk, serviceReg)
   235  			if err != nil {
   236  				mErr.Errors = append(mErr.Errors, err)
   237  				continue
   238  			}
   239  
   240  			// creating a nomad check if we have to handle this particular check type
   241  			c.registryLock.RLock()
   242  			if _, ok := c.delegateChecks[chk.Type]; ok {
   243  				_, ok := c.checkRunners[consulCheckID(chkReg.ID)]
   244  				c.registryLock.RUnlock()
   245  				if ok {
   246  					continue
   247  				}
   248  
   249  				nc, err := c.createDelegatedCheck(chk, chkReg.ID)
   250  				if err != nil {
   251  					mErr.Errors = append(mErr.Errors, err)
   252  					continue
   253  				}
   254  
   255  				cr := NewCheckRunner(nc, c.runCheck, c.logger)
   256  				c.registryLock.Lock()
   257  				// TODO type the CheckRunner
   258  				c.checkRunners[consulCheckID(nc.ID())] = cr
   259  				c.registryLock.Unlock()
   260  			} else {
   261  				c.registryLock.RUnlock()
   262  			}
   263  
   264  			registeredChecks[serviceKey] = append(registeredChecks[serviceKey], chkReg)
   265  		}
   266  	}
   267  
   268  	if len(mErr.Errors) > 0 {
   269  		return mErr.ErrorOrNil()
   270  	}
   271  
   272  	// Update the services and checks groups for this domain
   273  	c.groupsLock.Lock()
   274  
   275  	// Create map for service group if it doesn't exist
   276  	serviceKeys, ok := c.servicesGroups[domain]
   277  	if !ok {
   278  		serviceKeys = make(map[ServiceKey]*consul.AgentServiceRegistration, len(registeredServices))
   279  		c.servicesGroups[domain] = serviceKeys
   280  	}
   281  
   282  	// Remove stale services
   283  	for existingServiceKey := range serviceKeys {
   284  		if _, ok := registeredServices[existingServiceKey]; !ok {
   285  			// Exisitng service needs to be removed
   286  			delete(serviceKeys, existingServiceKey)
   287  		}
   288  	}
   289  
   290  	// Add registered services
   291  	for serviceKey, service := range registeredServices {
   292  		serviceKeys[serviceKey] = service
   293  	}
   294  
   295  	// Create map for check group if it doesn't exist
   296  	checkKeys, ok := c.checkGroups[domain]
   297  	if !ok {
   298  		checkKeys = make(map[ServiceKey][]*consul.AgentCheckRegistration, len(registeredChecks))
   299  		c.checkGroups[domain] = checkKeys
   300  	}
   301  
   302  	// Remove stale checks
   303  	for existingCheckKey := range checkKeys {
   304  		if _, ok := registeredChecks[existingCheckKey]; !ok {
   305  			// Exisitng check needs to be removed
   306  			delete(checkKeys, existingCheckKey)
   307  		}
   308  	}
   309  
   310  	// Add registered checks
   311  	for checkKey, checks := range registeredChecks {
   312  		checkKeys[checkKey] = checks
   313  	}
   314  	c.groupsLock.Unlock()
   315  
   316  	// Sync immediately
   317  	c.SyncNow()
   318  
   319  	return nil
   320  }
   321  
   322  // SyncNow expires the current timer forcing the list of periodic callbacks
   323  // to be synced immediately.
   324  func (c *Syncer) SyncNow() {
   325  	select {
   326  	case c.notifySyncCh <- struct{}{}:
   327  	default:
   328  	}
   329  }
   330  
   331  // flattenedServices returns a flattened list of services that are registered
   332  // locally
   333  func (c *Syncer) flattenedServices() []*consul.AgentServiceRegistration {
   334  	const initialNumServices = 8
   335  	services := make([]*consul.AgentServiceRegistration, 0, initialNumServices)
   336  	c.groupsLock.RLock()
   337  	defer c.groupsLock.RUnlock()
   338  	for _, servicesGroup := range c.servicesGroups {
   339  		for _, service := range servicesGroup {
   340  			services = append(services, service)
   341  		}
   342  	}
   343  	return services
   344  }
   345  
   346  // flattenedChecks returns a flattened list of checks that are registered
   347  // locally
   348  func (c *Syncer) flattenedChecks() []*consul.AgentCheckRegistration {
   349  	const initialNumChecks = 8
   350  	checks := make([]*consul.AgentCheckRegistration, 0, initialNumChecks)
   351  	c.groupsLock.RLock()
   352  	for _, checkGroup := range c.checkGroups {
   353  		for _, check := range checkGroup {
   354  			checks = append(checks, check...)
   355  		}
   356  	}
   357  	c.groupsLock.RUnlock()
   358  	return checks
   359  }
   360  
   361  func (c *Syncer) signalShutdown() {
   362  	select {
   363  	case c.notifyShutdownCh <- struct{}{}:
   364  	default:
   365  	}
   366  }
   367  
   368  // Shutdown de-registers the services and checks and shuts down periodic syncing
   369  func (c *Syncer) Shutdown() error {
   370  	var mErr multierror.Error
   371  
   372  	c.shutdownLock.Lock()
   373  	if !c.shutdown {
   374  		c.shutdown = true
   375  	}
   376  	c.shutdownLock.Unlock()
   377  
   378  	c.signalShutdown()
   379  
   380  	// Stop all the checks that nomad is running
   381  	c.registryLock.RLock()
   382  	defer c.registryLock.RUnlock()
   383  	for _, cr := range c.checkRunners {
   384  		cr.Stop()
   385  	}
   386  
   387  	// De-register all the services registered by this syncer from Consul
   388  	services, err := c.queryAgentServices()
   389  	if err != nil {
   390  		mErr.Errors = append(mErr.Errors, err)
   391  	}
   392  	for serviceID := range services {
   393  		convertedID := string(serviceID)
   394  		if err := c.client.Agent().ServiceDeregister(convertedID); err != nil {
   395  			c.logger.Printf("[WARN] consul.syncer: failed to deregister service ID %+q: %v", convertedID, err)
   396  			mErr.Errors = append(mErr.Errors, err)
   397  		}
   398  	}
   399  	return mErr.ErrorOrNil()
   400  }
   401  
   402  // queryChecks queries the Consul Agent for a list of Consul checks that
   403  // have been registered with this Consul Syncer.
   404  func (c *Syncer) queryChecks() (map[consulCheckID]*consul.AgentCheck, error) {
   405  	checks, err := c.client.Agent().Checks()
   406  	if err != nil {
   407  		return nil, err
   408  	}
   409  	return c.filterConsulChecks(checks), nil
   410  }
   411  
   412  // queryAgentServices queries the Consul Agent for a list of Consul services that
   413  // have been registered with this Consul Syncer.
   414  func (c *Syncer) queryAgentServices() (map[consulServiceID]*consul.AgentService, error) {
   415  	services, err := c.client.Agent().Services()
   416  	if err != nil {
   417  		return nil, err
   418  	}
   419  	return c.filterConsulServices(services), nil
   420  }
   421  
   422  // syncChecks synchronizes this Syncer's Consul Checks with the Consul Agent.
   423  func (c *Syncer) syncChecks() error {
   424  	var mErr multierror.Error
   425  	consulChecks, err := c.queryChecks()
   426  	if err != nil {
   427  		return err
   428  	}
   429  
   430  	// Synchronize checks with Consul
   431  	missingChecks, existingChecks, changedChecks, staleChecks := c.calcChecksDiff(consulChecks)
   432  	for _, check := range missingChecks {
   433  		if err := c.registerCheck(check); err != nil {
   434  			mErr.Errors = append(mErr.Errors, err)
   435  		}
   436  	}
   437  	for _, check := range existingChecks {
   438  		c.ensureCheckRunning(check)
   439  	}
   440  	for _, check := range changedChecks {
   441  		// NOTE(sean@): Do we need to deregister the check before
   442  		// re-registering it?  Not deregistering to avoid missing the
   443  		// TTL but doesn't correct reconcile any possible drift with
   444  		// the check.
   445  		//
   446  		// if err := c.deregisterCheck(check.ID); err != nil {
   447  		//   mErr.Errors = append(mErr.Errors, err)
   448  		// }
   449  		if err := c.registerCheck(check); err != nil {
   450  			mErr.Errors = append(mErr.Errors, err)
   451  		}
   452  	}
   453  	for _, check := range staleChecks {
   454  		if err := c.deregisterCheck(consulCheckID(check.ID)); err != nil {
   455  			mErr.Errors = append(mErr.Errors, err)
   456  		}
   457  	}
   458  	return mErr.ErrorOrNil()
   459  }
   460  
   461  // compareConsulCheck takes a consul.AgentCheckRegistration instance and
   462  // compares it with a consul.AgentCheck.  Returns true if they are equal
   463  // according to consul.AgentCheck, otherwise false.
   464  func compareConsulCheck(localCheck *consul.AgentCheckRegistration, consulCheck *consul.AgentCheck) bool {
   465  	if consulCheck.CheckID != localCheck.ID ||
   466  		consulCheck.Name != localCheck.Name ||
   467  		consulCheck.Notes != localCheck.Notes ||
   468  		consulCheck.ServiceID != localCheck.ServiceID {
   469  		return false
   470  	}
   471  	return true
   472  }
   473  
   474  // calcChecksDiff takes the argument (consulChecks) and calculates the delta
   475  // between the consul.Syncer's list of known checks (c.flattenedChecks()).
   476  // Four arrays are returned:
   477  //
   478  // 1) a slice of checks that exist only locally in the Syncer and are missing
   479  // from the Consul Agent (consulChecks) and therefore need to be registered.
   480  //
   481  // 2) a slice of checks that exist in both the local consul.Syncer's
   482  // tracked list and Consul Agent (consulChecks).
   483  //
   484  // 3) a slice of checks that exist in both the local consul.Syncer's
   485  // tracked list and Consul Agent (consulServices) but have diverged state.
   486  //
   487  // 4) a slice of checks that exist only in the Consul Agent (consulChecks)
   488  // and should be removed because the Consul Agent has drifted from the
   489  // Syncer.
   490  func (c *Syncer) calcChecksDiff(consulChecks map[consulCheckID]*consul.AgentCheck) (
   491  	missingChecks []*consul.AgentCheckRegistration,
   492  	equalChecks []*consul.AgentCheckRegistration,
   493  	changedChecks []*consul.AgentCheckRegistration,
   494  	staleChecks []*consul.AgentCheckRegistration) {
   495  
   496  	type mergedCheck struct {
   497  		check *consul.AgentCheckRegistration
   498  		// 'l' == Nomad local only
   499  		// 'e' == equal
   500  		// 'c' == changed
   501  		// 'a' == Consul agent only
   502  		state byte
   503  	}
   504  	var (
   505  		localChecksCount   = 0
   506  		equalChecksCount   = 0
   507  		changedChecksCount = 0
   508  		agentChecks        = 0
   509  	)
   510  	flattenedChecks := c.flattenedChecks()
   511  	localChecks := make(map[string]*mergedCheck, len(flattenedChecks)+len(consulChecks))
   512  	for _, localCheck := range flattenedChecks {
   513  		localChecksCount++
   514  		localChecks[localCheck.ID] = &mergedCheck{localCheck, 'l'}
   515  	}
   516  	for _, consulCheck := range consulChecks {
   517  		if localCheck, found := localChecks[consulCheck.CheckID]; found {
   518  			localChecksCount--
   519  			if compareConsulCheck(localCheck.check, consulCheck) {
   520  				equalChecksCount++
   521  				localChecks[consulCheck.CheckID].state = 'e'
   522  			} else {
   523  				changedChecksCount++
   524  				localChecks[consulCheck.CheckID].state = 'c'
   525  			}
   526  		} else {
   527  			agentChecks++
   528  			agentCheckReg := &consul.AgentCheckRegistration{
   529  				ID:        consulCheck.CheckID,
   530  				Name:      consulCheck.Name,
   531  				Notes:     consulCheck.Notes,
   532  				ServiceID: consulCheck.ServiceID,
   533  			}
   534  			localChecks[consulCheck.CheckID] = &mergedCheck{agentCheckReg, 'a'}
   535  		}
   536  	}
   537  
   538  	missingChecks = make([]*consul.AgentCheckRegistration, 0, localChecksCount)
   539  	equalChecks = make([]*consul.AgentCheckRegistration, 0, equalChecksCount)
   540  	changedChecks = make([]*consul.AgentCheckRegistration, 0, changedChecksCount)
   541  	staleChecks = make([]*consul.AgentCheckRegistration, 0, agentChecks)
   542  	for _, check := range localChecks {
   543  		switch check.state {
   544  		case 'l':
   545  			missingChecks = append(missingChecks, check.check)
   546  		case 'e':
   547  			equalChecks = append(equalChecks, check.check)
   548  		case 'c':
   549  			changedChecks = append(changedChecks, check.check)
   550  		case 'a':
   551  			staleChecks = append(staleChecks, check.check)
   552  		}
   553  	}
   554  
   555  	return missingChecks, equalChecks, changedChecks, staleChecks
   556  }
   557  
   558  // compareConsulService takes a consul.AgentServiceRegistration instance and
   559  // compares it with a consul.AgentService.  Returns true if they are equal
   560  // according to consul.AgentService, otherwise false.
   561  func compareConsulService(localService *consul.AgentServiceRegistration, consulService *consul.AgentService) bool {
   562  	if consulService.ID != localService.ID ||
   563  		consulService.Service != localService.Name ||
   564  		consulService.Port != localService.Port ||
   565  		consulService.Address != localService.Address ||
   566  		consulService.EnableTagOverride != localService.EnableTagOverride {
   567  		return false
   568  	}
   569  
   570  	serviceTags := make(map[string]byte, len(localService.Tags))
   571  	for _, tag := range localService.Tags {
   572  		serviceTags[tag] = 'l'
   573  	}
   574  	for _, tag := range consulService.Tags {
   575  		if _, found := serviceTags[tag]; !found {
   576  			return false
   577  		}
   578  		serviceTags[tag] = 'b'
   579  	}
   580  	for _, state := range serviceTags {
   581  		if state == 'l' {
   582  			return false
   583  		}
   584  	}
   585  
   586  	return true
   587  }
   588  
   589  // calcServicesDiff takes the argument (consulServices) and calculates the
   590  // delta between the consul.Syncer's list of known services
   591  // (c.flattenedServices()).  Four arrays are returned:
   592  //
   593  // 1) a slice of services that exist only locally in the Syncer and are
   594  // missing from the Consul Agent (consulServices) and therefore need to be
   595  // registered.
   596  //
   597  // 2) a slice of services that exist in both the local consul.Syncer's
   598  // tracked list and Consul Agent (consulServices) *AND* are identical.
   599  //
   600  // 3) a slice of services that exist in both the local consul.Syncer's
   601  // tracked list and Consul Agent (consulServices) but have diverged state.
   602  //
   603  // 4) a slice of services that exist only in the Consul Agent
   604  // (consulServices) and should be removed because the Consul Agent has
   605  // drifted from the Syncer.
   606  func (c *Syncer) calcServicesDiff(consulServices map[consulServiceID]*consul.AgentService) (missingServices []*consul.AgentServiceRegistration, equalServices []*consul.AgentServiceRegistration, changedServices []*consul.AgentServiceRegistration, staleServices []*consul.AgentServiceRegistration) {
   607  	type mergedService struct {
   608  		service *consul.AgentServiceRegistration
   609  		// 'l' == Nomad local only
   610  		// 'e' == equal
   611  		// 'c' == changed
   612  		// 'a' == Consul agent only
   613  		state byte
   614  	}
   615  	var (
   616  		localServicesCount   = 0
   617  		equalServicesCount   = 0
   618  		changedServicesCount = 0
   619  		agentServices        = 0
   620  	)
   621  	flattenedServices := c.flattenedServices()
   622  	localServices := make(map[string]*mergedService, len(flattenedServices)+len(consulServices))
   623  	for _, localService := range flattenedServices {
   624  		localServicesCount++
   625  		localServices[localService.ID] = &mergedService{localService, 'l'}
   626  	}
   627  	for _, consulService := range consulServices {
   628  		if localService, found := localServices[consulService.ID]; found {
   629  			localServicesCount--
   630  			if compareConsulService(localService.service, consulService) {
   631  				equalServicesCount++
   632  				localServices[consulService.ID].state = 'e'
   633  			} else {
   634  				changedServicesCount++
   635  				localServices[consulService.ID].state = 'c'
   636  			}
   637  		} else {
   638  			agentServices++
   639  			agentServiceReg := &consul.AgentServiceRegistration{
   640  				ID:      consulService.ID,
   641  				Name:    consulService.Service,
   642  				Tags:    consulService.Tags,
   643  				Port:    consulService.Port,
   644  				Address: consulService.Address,
   645  			}
   646  			localServices[consulService.ID] = &mergedService{agentServiceReg, 'a'}
   647  		}
   648  	}
   649  
   650  	missingServices = make([]*consul.AgentServiceRegistration, 0, localServicesCount)
   651  	equalServices = make([]*consul.AgentServiceRegistration, 0, equalServicesCount)
   652  	changedServices = make([]*consul.AgentServiceRegistration, 0, changedServicesCount)
   653  	staleServices = make([]*consul.AgentServiceRegistration, 0, agentServices)
   654  	for _, service := range localServices {
   655  		switch service.state {
   656  		case 'l':
   657  			missingServices = append(missingServices, service.service)
   658  		case 'e':
   659  			equalServices = append(equalServices, service.service)
   660  		case 'c':
   661  			changedServices = append(changedServices, service.service)
   662  		case 'a':
   663  			staleServices = append(staleServices, service.service)
   664  		}
   665  	}
   666  
   667  	return missingServices, equalServices, changedServices, staleServices
   668  }
   669  
   670  // syncServices synchronizes this Syncer's Consul Services with the Consul
   671  // Agent.
   672  func (c *Syncer) syncServices() error {
   673  	consulServices, err := c.queryAgentServices()
   674  	if err != nil {
   675  		return err
   676  	}
   677  
   678  	// Synchronize services with Consul
   679  	var mErr multierror.Error
   680  	missingServices, _, changedServices, removedServices := c.calcServicesDiff(consulServices)
   681  	for _, service := range missingServices {
   682  		if err := c.client.Agent().ServiceRegister(service); err != nil {
   683  			mErr.Errors = append(mErr.Errors, err)
   684  		}
   685  	}
   686  	for _, service := range changedServices {
   687  		// Re-register the local service
   688  		if err := c.client.Agent().ServiceRegister(service); err != nil {
   689  			mErr.Errors = append(mErr.Errors, err)
   690  		}
   691  	}
   692  	for _, service := range removedServices {
   693  		if err := c.deregisterService(service.ID); err != nil {
   694  			mErr.Errors = append(mErr.Errors, err)
   695  		}
   696  	}
   697  	return mErr.ErrorOrNil()
   698  }
   699  
   700  // registerCheck registers a check definition with Consul
   701  func (c *Syncer) registerCheck(chkReg *consul.AgentCheckRegistration) error {
   702  	c.registryLock.RLock()
   703  	if cr, ok := c.checkRunners[consulCheckID(chkReg.ID)]; ok {
   704  		cr.Start()
   705  	}
   706  	c.registryLock.RUnlock()
   707  	return c.client.Agent().CheckRegister(chkReg)
   708  }
   709  
   710  // ensureCheckRunning starts the check runner for a check if it's not already running
   711  func (c *Syncer) ensureCheckRunning(chk *consul.AgentCheckRegistration) {
   712  	c.registryLock.RLock()
   713  	defer c.registryLock.RUnlock()
   714  	if cr, ok := c.checkRunners[consulCheckID(chk.ID)]; ok && !cr.Started() {
   715  		c.logger.Printf("[DEBUG] consul.syncer: starting runner for existing check. %v", chk.ID)
   716  		cr.Start()
   717  	}
   718  }
   719  
   720  // createCheckReg creates a Check that can be registered with Nomad. It also
   721  // creates a Nomad check for the check types that it can handle.
   722  func (c *Syncer) createCheckReg(check *structs.ServiceCheck, serviceReg *consul.AgentServiceRegistration) (*consul.AgentCheckRegistration, error) {
   723  	chkReg := consul.AgentCheckRegistration{
   724  		ID:        check.Hash(serviceReg.ID),
   725  		Name:      check.Name,
   726  		ServiceID: serviceReg.ID,
   727  	}
   728  	chkReg.Timeout = check.Timeout.String()
   729  	chkReg.Interval = check.Interval.String()
   730  	host, port := serviceReg.Address, serviceReg.Port
   731  	if check.PortLabel != "" {
   732  		host, port = c.addrFinder(check.PortLabel)
   733  	}
   734  	switch check.Type {
   735  	case structs.ServiceCheckHTTP:
   736  		if check.Protocol == "" {
   737  			check.Protocol = "http"
   738  		}
   739  		base := url.URL{
   740  			Scheme: check.Protocol,
   741  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
   742  		}
   743  		relative, err := url.Parse(check.Path)
   744  		if err != nil {
   745  			return nil, err
   746  		}
   747  		url := base.ResolveReference(relative)
   748  		chkReg.HTTP = url.String()
   749  	case structs.ServiceCheckTCP:
   750  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
   751  	case structs.ServiceCheckScript:
   752  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
   753  	default:
   754  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
   755  	}
   756  	chkReg.Status = check.InitialStatus
   757  	return &chkReg, nil
   758  }
   759  
   760  // generateConsulServiceID takes the domain and service key and returns a Consul
   761  // ServiceID
   762  func generateConsulServiceID(domain ServiceDomain, key ServiceKey) consulServiceID {
   763  	return consulServiceID(fmt.Sprintf("%s-%s-%s", nomadServicePrefix, domain, key))
   764  }
   765  
   766  // createService creates a Consul AgentService from a Nomad ConsulService.
   767  func (c *Syncer) createService(service *structs.Service, domain ServiceDomain, key ServiceKey) (*consul.AgentServiceRegistration, error) {
   768  	c.registryLock.RLock()
   769  	defer c.registryLock.RUnlock()
   770  
   771  	srv := consul.AgentServiceRegistration{
   772  		ID:   string(generateConsulServiceID(domain, key)),
   773  		Name: service.Name,
   774  		Tags: service.Tags,
   775  	}
   776  	host, port := c.addrFinder(service.PortLabel)
   777  	if host != "" {
   778  		srv.Address = host
   779  	}
   780  
   781  	if port != 0 {
   782  		srv.Port = port
   783  	}
   784  
   785  	return &srv, nil
   786  }
   787  
   788  // deregisterService de-registers a service with the given ID from consul
   789  func (c *Syncer) deregisterService(serviceID string) error {
   790  	return c.client.Agent().ServiceDeregister(serviceID)
   791  }
   792  
   793  // deregisterCheck de-registers a check from Consul
   794  func (c *Syncer) deregisterCheck(id consulCheckID) error {
   795  	c.registryLock.Lock()
   796  	defer c.registryLock.Unlock()
   797  
   798  	// Deleting from Consul Agent
   799  	if err := c.client.Agent().CheckDeregister(string(id)); err != nil {
   800  		// CheckDeregister() will be reattempted again in a future
   801  		// sync.
   802  		return err
   803  	}
   804  
   805  	// Remove the check from the local registry
   806  	if cr, ok := c.checkRunners[id]; ok {
   807  		cr.Stop()
   808  		delete(c.checkRunners, id)
   809  	}
   810  
   811  	return nil
   812  }
   813  
   814  // Run triggers periodic syncing of services and checks with Consul.  This is
   815  // a long lived go-routine which is stopped during shutdown.
   816  func (c *Syncer) Run() {
   817  	sync := time.NewTimer(0)
   818  	for {
   819  		select {
   820  		case <-sync.C:
   821  			d := c.syncInterval - c.syncJitter
   822  			sync.Reset(d)
   823  
   824  			if err := c.SyncServices(); err != nil {
   825  				if c.consulAvailable {
   826  					c.logger.Printf("[DEBUG] consul.syncer: error in syncing: %v", err)
   827  				}
   828  				c.consulAvailable = false
   829  			} else {
   830  				if !c.consulAvailable {
   831  					c.logger.Printf("[DEBUG] consul.syncer: syncs succesful")
   832  				}
   833  				c.consulAvailable = true
   834  			}
   835  		case <-c.notifySyncCh:
   836  			sync.Reset(0)
   837  		case <-c.shutdownCh:
   838  			c.Shutdown()
   839  		case <-c.notifyShutdownCh:
   840  			sync.Stop()
   841  			c.logger.Printf("[INFO] consul.syncer: shutting down syncer ")
   842  			return
   843  		}
   844  	}
   845  }
   846  
   847  // RunHandlers executes each handler (randomly)
   848  func (c *Syncer) RunHandlers() error {
   849  	c.periodicLock.RLock()
   850  	handlers := make(map[string]types.PeriodicCallback, len(c.periodicCallbacks))
   851  	for name, fn := range c.periodicCallbacks {
   852  		handlers[name] = fn
   853  	}
   854  	c.periodicLock.RUnlock()
   855  
   856  	var mErr multierror.Error
   857  	for _, fn := range handlers {
   858  		if err := fn(); err != nil {
   859  			mErr.Errors = append(mErr.Errors, err)
   860  		}
   861  	}
   862  	return mErr.ErrorOrNil()
   863  }
   864  
   865  // SyncServices sync the services with the Consul Agent
   866  func (c *Syncer) SyncServices() error {
   867  	var mErr multierror.Error
   868  	if err := c.syncServices(); err != nil {
   869  		mErr.Errors = append(mErr.Errors, err)
   870  	}
   871  	if err := c.syncChecks(); err != nil {
   872  		mErr.Errors = append(mErr.Errors, err)
   873  	}
   874  	if err := c.RunHandlers(); err != nil {
   875  		return err
   876  	}
   877  	return mErr.ErrorOrNil()
   878  }
   879  
   880  // filterConsulServices prunes out all the service who were not registered with
   881  // the syncer
   882  func (c *Syncer) filterConsulServices(consulServices map[string]*consul.AgentService) map[consulServiceID]*consul.AgentService {
   883  	localServices := make(map[consulServiceID]*consul.AgentService, len(consulServices))
   884  	c.groupsLock.RLock()
   885  	defer c.groupsLock.RUnlock()
   886  	for serviceID, service := range consulServices {
   887  		for domain := range c.servicesGroups {
   888  			if strings.HasPrefix(service.ID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) {
   889  				localServices[consulServiceID(serviceID)] = service
   890  				break
   891  			}
   892  		}
   893  	}
   894  	return localServices
   895  }
   896  
   897  // filterConsulChecks prunes out all the consul checks which do not have
   898  // services with Syncer's idPrefix.
   899  func (c *Syncer) filterConsulChecks(consulChecks map[string]*consul.AgentCheck) map[consulCheckID]*consul.AgentCheck {
   900  	localChecks := make(map[consulCheckID]*consul.AgentCheck, len(consulChecks))
   901  	c.groupsLock.RLock()
   902  	defer c.groupsLock.RUnlock()
   903  	for checkID, check := range consulChecks {
   904  		for domain := range c.checkGroups {
   905  			if strings.HasPrefix(check.ServiceID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) {
   906  				localChecks[consulCheckID(checkID)] = check
   907  				break
   908  			}
   909  		}
   910  	}
   911  	return localChecks
   912  }
   913  
   914  // consulPresent indicates whether the Consul Agent is responding
   915  func (c *Syncer) consulPresent() bool {
   916  	_, err := c.client.Agent().Self()
   917  	return err == nil
   918  }
   919  
   920  // runCheck runs a check and updates the corresponding ttl check in consul
   921  func (c *Syncer) runCheck(check Check) {
   922  	res := check.Run()
   923  	if res.Duration >= check.Timeout() {
   924  		c.logger.Printf("[DEBUG] consul.syncer: check took time: %v, timeout: %v", res.Duration, check.Timeout())
   925  	}
   926  	state := consul.HealthCritical
   927  	output := res.Output
   928  	switch res.ExitCode {
   929  	case 0:
   930  		state = consul.HealthPassing
   931  	case 1:
   932  		state = consul.HealthWarning
   933  	default:
   934  		state = consul.HealthCritical
   935  	}
   936  	if res.Err != nil {
   937  		state = consul.HealthCritical
   938  		output = res.Err.Error()
   939  	}
   940  	if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil {
   941  		if c.consulAvailable {
   942  			c.logger.Printf("[DEBUG] consul.syncer: check %+q failed, disabling Consul checks until until next successful sync: %v", check.ID(), err)
   943  			c.consulAvailable = false
   944  		} else {
   945  			c.consulAvailable = true
   946  		}
   947  	}
   948  }
   949  
   950  // ReapUnmatched prunes all services that do not exist in the passed domains
   951  func (c *Syncer) ReapUnmatched(domains []ServiceDomain) error {
   952  	servicesInConsul, err := c.ConsulClient().Agent().Services()
   953  	if err != nil {
   954  		return err
   955  	}
   956  
   957  	var mErr multierror.Error
   958  	for serviceID := range servicesInConsul {
   959  		// Skip any service that was not registered by Nomad
   960  		if !strings.HasPrefix(serviceID, nomadServicePrefix) {
   961  			continue
   962  		}
   963  
   964  		// Filter services that do not exist in the desired domains
   965  		match := false
   966  		for _, domain := range domains {
   967  			// Include the hyphen so it is explicit to that domain otherwise it
   968  			// maybe a subset match
   969  			desired := fmt.Sprintf("%s-%s-", nomadServicePrefix, domain)
   970  			if strings.HasPrefix(serviceID, desired) {
   971  				match = true
   972  				break
   973  			}
   974  		}
   975  
   976  		if !match {
   977  			if err := c.deregisterService(serviceID); err != nil {
   978  				mErr.Errors = append(mErr.Errors, err)
   979  			}
   980  		}
   981  	}
   982  
   983  	return mErr.ErrorOrNil()
   984  }
   985  
   986  // AddPeriodicHandler adds a uniquely named callback.  Returns true if
   987  // successful, false if a handler with the same name already exists.
   988  func (c *Syncer) AddPeriodicHandler(name string, fn types.PeriodicCallback) bool {
   989  	c.periodicLock.Lock()
   990  	defer c.periodicLock.Unlock()
   991  	if _, found := c.periodicCallbacks[name]; found {
   992  		c.logger.Printf("[ERROR] consul.syncer: failed adding handler %+q", name)
   993  		return false
   994  	}
   995  	c.periodicCallbacks[name] = fn
   996  	return true
   997  }
   998  
   999  // NumHandlers returns the number of callbacks registered with the syncer
  1000  func (c *Syncer) NumHandlers() int {
  1001  	c.periodicLock.RLock()
  1002  	defer c.periodicLock.RUnlock()
  1003  	return len(c.periodicCallbacks)
  1004  }
  1005  
  1006  // RemovePeriodicHandler removes a handler with a given name.
  1007  func (c *Syncer) RemovePeriodicHandler(name string) {
  1008  	c.periodicLock.Lock()
  1009  	defer c.periodicLock.Unlock()
  1010  	delete(c.periodicCallbacks, name)
  1011  }
  1012  
  1013  // ConsulClient returns the Consul client used by the Syncer.
  1014  func (c *Syncer) ConsulClient() *consul.Client {
  1015  	return c.client
  1016  }