github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/command/agent/consul/syncer.go (about)

     1  // Package consul is used by Nomad to register all services both static services
     2  // and dynamic via allocations.
     3  //
     4  // Consul Service IDs have the following format: ${nomadServicePrefix}-${groupName}-${serviceKey}
     5  // groupName takes on one of the following values:
     6  // - server
     7  // - client
     8  // - executor-${alloc-id}-${task-name}
     9  //
    10  // serviceKey should be generated by service registrators.
    11  // If the serviceKey is being generated by the executor for a Nomad Task.Services
    12  // the following helper should be used:
    13  //    NOTE: Executor should interpolate the service prior to calling
    14  //    func GenerateTaskServiceKey(service *structs.Service) string
    15  //
    16  // The Nomad Client reaps services registered from dead allocations that were
    17  // not properly cleaned up by the executor (this is not the expected case).
    18  //
    19  // TODO fix this comment
    20  // The Consul ServiceIDs generated by the executor will contain the allocation
    21  // ID. Thus the client can generate the list of Consul ServiceIDs to keep by
    22  // calling the following method on all running allocations the client is aware
    23  // of:
    24  // func GenerateExecutorServiceKeyPrefixFromAlloc(allocID string) string
    25  package consul
    26  
    27  import (
    28  	"fmt"
    29  	"log"
    30  	"net"
    31  	"net/url"
    32  	"strconv"
    33  	"strings"
    34  	"sync"
    35  	"time"
    36  
    37  	consul "github.com/hashicorp/consul/api"
    38  	"github.com/hashicorp/consul/lib"
    39  	"github.com/hashicorp/go-multierror"
    40  
    41  	"github.com/hashicorp/nomad/nomad/structs"
    42  	"github.com/hashicorp/nomad/nomad/structs/config"
    43  	"github.com/hashicorp/nomad/nomad/types"
    44  )
    45  
    46  const (
    47  	// initialSyncBuffer is the max time an initial sync will sleep
    48  	// before syncing.
    49  	initialSyncBuffer = 30 * time.Second
    50  
    51  	// initialSyncDelay is the delay before an initial sync.
    52  	initialSyncDelay = 5 * time.Second
    53  
    54  	// nomadServicePrefix is the first prefix that scopes all Nomad registered
    55  	// services
    56  	nomadServicePrefix = "_nomad"
    57  
    58  	// The periodic time interval for syncing services and checks with Consul
    59  	syncInterval = 5 * time.Second
    60  
    61  	// syncJitter provides a little variance in the frequency at which
    62  	// Syncer polls Consul.
    63  	syncJitter = 8
    64  
    65  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    66  	// the check result
    67  	ttlCheckBuffer = 31 * time.Second
    68  
    69  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    70  	// spend waiting for a response from a Consul Query.
    71  	DefaultQueryWaitDuration = 2 * time.Second
    72  
    73  	// ServiceTagHTTP is the tag assigned to HTTP services
    74  	ServiceTagHTTP = "http"
    75  
    76  	// ServiceTagRPC is the tag assigned to RPC services
    77  	ServiceTagRPC = "rpc"
    78  
    79  	// ServiceTagSerf is the tag assigned to Serf services
    80  	ServiceTagSerf = "serf"
    81  )
    82  
    83  // consulServiceID and consulCheckID are the IDs registered with Consul
    84  type consulServiceID string
    85  type consulCheckID string
    86  
    87  // ServiceKey is the generated service key that is used to build the Consul
    88  // ServiceID
    89  type ServiceKey string
    90  
    91  // ServiceDomain is the domain of services registered by Nomad
    92  type ServiceDomain string
    93  
    94  const (
    95  	ClientDomain ServiceDomain = "client"
    96  	ServerDomain ServiceDomain = "server"
    97  )
    98  
    99  // NewExecutorDomain returns a domain specific to the alloc ID and task
   100  func NewExecutorDomain(allocID, task string) ServiceDomain {
   101  	return ServiceDomain(fmt.Sprintf("executor-%s-%s", allocID, task))
   102  }
   103  
   104  // Syncer allows syncing of services and checks with Consul
   105  type Syncer struct {
   106  	client          *consul.Client
   107  	consulAvailable bool
   108  
   109  	// servicesGroups and checkGroups are named groups of services and checks
   110  	// respectively that will be flattened and reconciled with Consul when
   111  	// SyncServices() is called. The key to the servicesGroups map is unique
   112  	// per handler and is used to allow the Agent's services to be maintained
   113  	// independently of the Client or Server's services.
   114  	servicesGroups map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration
   115  	checkGroups    map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration
   116  	groupsLock     sync.RWMutex
   117  
   118  	// The "Consul Registry" is a collection of Consul Services and
   119  	// Checks all guarded by the registryLock.
   120  	registryLock sync.RWMutex
   121  
   122  	// checkRunners are delegated Consul checks being ran by the Syncer
   123  	checkRunners map[consulCheckID]*CheckRunner
   124  
   125  	addrFinder           func(portLabel string) (string, int)
   126  	createDelegatedCheck func(*structs.ServiceCheck, string) (Check, error)
   127  	delegateChecks       map[string]struct{} // delegateChecks are the checks that the Nomad client runs and reports to Consul
   128  	// End registryLock guarded attributes.
   129  
   130  	logger *log.Logger
   131  
   132  	shutdownCh   chan struct{}
   133  	shutdown     bool
   134  	shutdownLock sync.Mutex
   135  
   136  	// notifyShutdownCh is used to notify a Syncer it needs to shutdown.
   137  	// This can happen because there was an explicit call to the Syncer's
   138  	// Shutdown() method, or because the calling task signaled the
   139  	// program is going to exit by closing its shutdownCh.
   140  	notifyShutdownCh chan struct{}
   141  
   142  	// periodicCallbacks is walked sequentially when the timer in Run
   143  	// fires.
   144  	periodicCallbacks map[string]types.PeriodicCallback
   145  	notifySyncCh      chan struct{}
   146  	periodicLock      sync.RWMutex
   147  }
   148  
   149  // NewSyncer returns a new consul.Syncer
   150  func NewSyncer(consulConfig *config.ConsulConfig, shutdownCh chan struct{}, logger *log.Logger) (*Syncer, error) {
   151  	var consulClientConfig *consul.Config
   152  	var err error
   153  	consulClientConfig, err = consulConfig.ApiConfig()
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  
   158  	var consulClient *consul.Client
   159  	if consulClient, err = consul.NewClient(consulClientConfig); err != nil {
   160  		return nil, err
   161  	}
   162  	consulSyncer := Syncer{
   163  		client:            consulClient,
   164  		logger:            logger,
   165  		consulAvailable:   true,
   166  		shutdownCh:        shutdownCh,
   167  		servicesGroups:    make(map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration),
   168  		checkGroups:       make(map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration),
   169  		checkRunners:      make(map[consulCheckID]*CheckRunner),
   170  		periodicCallbacks: make(map[string]types.PeriodicCallback),
   171  		// default noop implementation of addrFinder
   172  		addrFinder: func(string) (string, int) { return "", 0 },
   173  	}
   174  
   175  	return &consulSyncer, nil
   176  }
   177  
   178  // SetDelegatedChecks sets the checks that nomad is going to run and report the
   179  // result back to consul
   180  func (c *Syncer) SetDelegatedChecks(delegateChecks map[string]struct{}, createDelegatedCheckFn func(*structs.ServiceCheck, string) (Check, error)) *Syncer {
   181  	c.delegateChecks = delegateChecks
   182  	c.createDelegatedCheck = createDelegatedCheckFn
   183  	return c
   184  }
   185  
   186  // SetAddrFinder sets a function to find the host and port for a Service given its port label
   187  func (c *Syncer) SetAddrFinder(addrFinder func(string) (string, int)) *Syncer {
   188  	c.addrFinder = addrFinder
   189  	return c
   190  }
   191  
   192  // GenerateServiceKey should be called to generate a serviceKey based on the
   193  // Service.
   194  func GenerateServiceKey(service *structs.Service) ServiceKey {
   195  	var key string
   196  	numTags := len(service.Tags)
   197  	switch numTags {
   198  	case 0:
   199  		key = fmt.Sprintf("%s", service.Name)
   200  	default:
   201  		tags := strings.Join(service.Tags, "-")
   202  		key = fmt.Sprintf("%s-%s", service.Name, tags)
   203  	}
   204  	return ServiceKey(key)
   205  }
   206  
   207  // SetServices stores the map of Nomad Services to the provided service
   208  // domain name.
   209  func (c *Syncer) SetServices(domain ServiceDomain, services map[ServiceKey]*structs.Service) error {
   210  	var mErr multierror.Error
   211  	numServ := len(services)
   212  	registeredServices := make(map[ServiceKey]*consul.AgentServiceRegistration, numServ)
   213  	registeredChecks := make(map[ServiceKey][]*consul.AgentCheckRegistration, numServ)
   214  	for serviceKey, service := range services {
   215  		serviceReg, err := c.createService(service, domain, serviceKey)
   216  		if err != nil {
   217  			mErr.Errors = append(mErr.Errors, err)
   218  			continue
   219  		}
   220  		registeredServices[serviceKey] = serviceReg
   221  
   222  		// Register the check(s) for this service
   223  		for _, chk := range service.Checks {
   224  			// Create a Consul check registration
   225  			chkReg, err := c.createCheckReg(chk, serviceReg)
   226  			if err != nil {
   227  				mErr.Errors = append(mErr.Errors, err)
   228  				continue
   229  			}
   230  
   231  			// creating a nomad check if we have to handle this particular check type
   232  			c.registryLock.RLock()
   233  			if _, ok := c.delegateChecks[chk.Type]; ok {
   234  				_, ok := c.checkRunners[consulCheckID(chkReg.ID)]
   235  				c.registryLock.RUnlock()
   236  				if ok {
   237  					continue
   238  				}
   239  
   240  				nc, err := c.createDelegatedCheck(chk, chkReg.ID)
   241  				if err != nil {
   242  					mErr.Errors = append(mErr.Errors, err)
   243  					continue
   244  				}
   245  
   246  				cr := NewCheckRunner(nc, c.runCheck, c.logger)
   247  				c.registryLock.Lock()
   248  				// TODO type the CheckRunner
   249  				c.checkRunners[consulCheckID(nc.ID())] = cr
   250  				c.registryLock.Unlock()
   251  			} else {
   252  				c.registryLock.RUnlock()
   253  			}
   254  
   255  			registeredChecks[serviceKey] = append(registeredChecks[serviceKey], chkReg)
   256  		}
   257  	}
   258  
   259  	if len(mErr.Errors) > 0 {
   260  		return mErr.ErrorOrNil()
   261  	}
   262  
   263  	// Update the services and checks groups for this domain
   264  	c.groupsLock.Lock()
   265  
   266  	// Create map for service group if it doesn't exist
   267  	serviceKeys, ok := c.servicesGroups[domain]
   268  	if !ok {
   269  		serviceKeys = make(map[ServiceKey]*consul.AgentServiceRegistration, len(registeredServices))
   270  		c.servicesGroups[domain] = serviceKeys
   271  	}
   272  
   273  	// Remove stale services
   274  	for existingServiceKey := range serviceKeys {
   275  		if _, ok := registeredServices[existingServiceKey]; !ok {
   276  			// Exisitng service needs to be removed
   277  			delete(serviceKeys, existingServiceKey)
   278  		}
   279  	}
   280  
   281  	// Add registered services
   282  	for serviceKey, service := range registeredServices {
   283  		serviceKeys[serviceKey] = service
   284  	}
   285  
   286  	// Create map for check group if it doesn't exist
   287  	checkKeys, ok := c.checkGroups[domain]
   288  	if !ok {
   289  		checkKeys = make(map[ServiceKey][]*consul.AgentCheckRegistration, len(registeredChecks))
   290  		c.checkGroups[domain] = checkKeys
   291  	}
   292  
   293  	// Remove stale checks
   294  	for existingCheckKey := range checkKeys {
   295  		if _, ok := registeredChecks[existingCheckKey]; !ok {
   296  			// Exisitng check needs to be removed
   297  			delete(checkKeys, existingCheckKey)
   298  		}
   299  	}
   300  
   301  	// Add registered checks
   302  	for checkKey, checks := range registeredChecks {
   303  		checkKeys[checkKey] = checks
   304  	}
   305  	c.groupsLock.Unlock()
   306  
   307  	// Sync immediately
   308  	c.SyncNow()
   309  
   310  	return nil
   311  }
   312  
   313  // SyncNow expires the current timer forcing the list of periodic callbacks
   314  // to be synced immediately.
   315  func (c *Syncer) SyncNow() {
   316  	select {
   317  	case c.notifySyncCh <- struct{}{}:
   318  	default:
   319  	}
   320  }
   321  
   322  // flattenedServices returns a flattened list of services that are registered
   323  // locally
   324  func (c *Syncer) flattenedServices() []*consul.AgentServiceRegistration {
   325  	const initialNumServices = 8
   326  	services := make([]*consul.AgentServiceRegistration, 0, initialNumServices)
   327  	c.groupsLock.RLock()
   328  	defer c.groupsLock.RUnlock()
   329  	for _, servicesGroup := range c.servicesGroups {
   330  		for _, service := range servicesGroup {
   331  			services = append(services, service)
   332  		}
   333  	}
   334  	return services
   335  }
   336  
   337  // flattenedChecks returns a flattened list of checks that are registered
   338  // locally
   339  func (c *Syncer) flattenedChecks() []*consul.AgentCheckRegistration {
   340  	const initialNumChecks = 8
   341  	checks := make([]*consul.AgentCheckRegistration, 0, initialNumChecks)
   342  	c.groupsLock.RLock()
   343  	for _, checkGroup := range c.checkGroups {
   344  		for _, check := range checkGroup {
   345  			checks = append(checks, check...)
   346  		}
   347  	}
   348  	c.groupsLock.RUnlock()
   349  	return checks
   350  }
   351  
   352  func (c *Syncer) signalShutdown() {
   353  	select {
   354  	case c.notifyShutdownCh <- struct{}{}:
   355  	default:
   356  	}
   357  }
   358  
   359  // Shutdown de-registers the services and checks and shuts down periodic syncing
   360  func (c *Syncer) Shutdown() error {
   361  	var mErr multierror.Error
   362  
   363  	c.shutdownLock.Lock()
   364  	if !c.shutdown {
   365  		c.shutdown = true
   366  	}
   367  	c.shutdownLock.Unlock()
   368  
   369  	c.signalShutdown()
   370  
   371  	// Stop all the checks that nomad is running
   372  	c.registryLock.RLock()
   373  	defer c.registryLock.RUnlock()
   374  	for _, cr := range c.checkRunners {
   375  		cr.Stop()
   376  	}
   377  
   378  	// De-register all the services registered by this syncer from Consul
   379  	services, err := c.queryAgentServices()
   380  	if err != nil {
   381  		mErr.Errors = append(mErr.Errors, err)
   382  	}
   383  	for serviceID := range services {
   384  		convertedID := string(serviceID)
   385  		if err := c.client.Agent().ServiceDeregister(convertedID); err != nil {
   386  			c.logger.Printf("[WARN] consul.syncer: failed to deregister service ID %+q: %v", convertedID, err)
   387  			mErr.Errors = append(mErr.Errors, err)
   388  		}
   389  	}
   390  	return mErr.ErrorOrNil()
   391  }
   392  
   393  // queryChecks queries the Consul Agent for a list of Consul checks that
   394  // have been registered with this Consul Syncer.
   395  func (c *Syncer) queryChecks() (map[consulCheckID]*consul.AgentCheck, error) {
   396  	checks, err := c.client.Agent().Checks()
   397  	if err != nil {
   398  		return nil, err
   399  	}
   400  	return c.filterConsulChecks(checks), nil
   401  }
   402  
   403  // queryAgentServices queries the Consul Agent for a list of Consul services that
   404  // have been registered with this Consul Syncer.
   405  func (c *Syncer) queryAgentServices() (map[consulServiceID]*consul.AgentService, error) {
   406  	services, err := c.client.Agent().Services()
   407  	if err != nil {
   408  		return nil, err
   409  	}
   410  	return c.filterConsulServices(services), nil
   411  }
   412  
   413  // syncChecks synchronizes this Syncer's Consul Checks with the Consul Agent.
   414  func (c *Syncer) syncChecks() error {
   415  	var mErr multierror.Error
   416  	consulChecks, err := c.queryChecks()
   417  	if err != nil {
   418  		return err
   419  	}
   420  
   421  	// Synchronize checks with Consul
   422  	missingChecks, existingChecks, changedChecks, staleChecks := c.calcChecksDiff(consulChecks)
   423  	for _, check := range missingChecks {
   424  		if err := c.registerCheck(check); err != nil {
   425  			mErr.Errors = append(mErr.Errors, err)
   426  		}
   427  	}
   428  	for _, check := range existingChecks {
   429  		c.ensureCheckRunning(check)
   430  	}
   431  	for _, check := range changedChecks {
   432  		// NOTE(sean@): Do we need to deregister the check before
   433  		// re-registering it?  Not deregistering to avoid missing the
   434  		// TTL but doesn't correct reconcile any possible drift with
   435  		// the check.
   436  		//
   437  		// if err := c.deregisterCheck(check.ID); err != nil {
   438  		//   mErr.Errors = append(mErr.Errors, err)
   439  		// }
   440  		if err := c.registerCheck(check); err != nil {
   441  			mErr.Errors = append(mErr.Errors, err)
   442  		}
   443  	}
   444  	for _, check := range staleChecks {
   445  		if err := c.deregisterCheck(consulCheckID(check.ID)); err != nil {
   446  			mErr.Errors = append(mErr.Errors, err)
   447  		}
   448  	}
   449  	return mErr.ErrorOrNil()
   450  }
   451  
   452  // compareConsulCheck takes a consul.AgentCheckRegistration instance and
   453  // compares it with a consul.AgentCheck.  Returns true if they are equal
   454  // according to consul.AgentCheck, otherwise false.
   455  func compareConsulCheck(localCheck *consul.AgentCheckRegistration, consulCheck *consul.AgentCheck) bool {
   456  	if consulCheck.CheckID != localCheck.ID ||
   457  		consulCheck.Name != localCheck.Name ||
   458  		consulCheck.Notes != localCheck.Notes ||
   459  		consulCheck.ServiceID != localCheck.ServiceID {
   460  		return false
   461  	}
   462  	return true
   463  }
   464  
   465  // calcChecksDiff takes the argument (consulChecks) and calculates the delta
   466  // between the consul.Syncer's list of known checks (c.flattenedChecks()).
   467  // Four arrays are returned:
   468  //
   469  // 1) a slice of checks that exist only locally in the Syncer and are missing
   470  // from the Consul Agent (consulChecks) and therefore need to be registered.
   471  //
   472  // 2) a slice of checks that exist in both the local consul.Syncer's
   473  // tracked list and Consul Agent (consulChecks).
   474  //
   475  // 3) a slice of checks that exist in both the local consul.Syncer's
   476  // tracked list and Consul Agent (consulServices) but have diverged state.
   477  //
   478  // 4) a slice of checks that exist only in the Consul Agent (consulChecks)
   479  // and should be removed because the Consul Agent has drifted from the
   480  // Syncer.
   481  func (c *Syncer) calcChecksDiff(consulChecks map[consulCheckID]*consul.AgentCheck) (
   482  	missingChecks []*consul.AgentCheckRegistration,
   483  	equalChecks []*consul.AgentCheckRegistration,
   484  	changedChecks []*consul.AgentCheckRegistration,
   485  	staleChecks []*consul.AgentCheckRegistration) {
   486  
   487  	type mergedCheck struct {
   488  		check *consul.AgentCheckRegistration
   489  		// 'l' == Nomad local only
   490  		// 'e' == equal
   491  		// 'c' == changed
   492  		// 'a' == Consul agent only
   493  		state byte
   494  	}
   495  	var (
   496  		localChecksCount   = 0
   497  		equalChecksCount   = 0
   498  		changedChecksCount = 0
   499  		agentChecks        = 0
   500  	)
   501  	flattenedChecks := c.flattenedChecks()
   502  	localChecks := make(map[string]*mergedCheck, len(flattenedChecks)+len(consulChecks))
   503  	for _, localCheck := range flattenedChecks {
   504  		localChecksCount++
   505  		localChecks[localCheck.ID] = &mergedCheck{localCheck, 'l'}
   506  	}
   507  	for _, consulCheck := range consulChecks {
   508  		if localCheck, found := localChecks[consulCheck.CheckID]; found {
   509  			localChecksCount--
   510  			if compareConsulCheck(localCheck.check, consulCheck) {
   511  				equalChecksCount++
   512  				localChecks[consulCheck.CheckID].state = 'e'
   513  			} else {
   514  				changedChecksCount++
   515  				localChecks[consulCheck.CheckID].state = 'c'
   516  			}
   517  		} else {
   518  			agentChecks++
   519  			agentCheckReg := &consul.AgentCheckRegistration{
   520  				ID:        consulCheck.CheckID,
   521  				Name:      consulCheck.Name,
   522  				Notes:     consulCheck.Notes,
   523  				ServiceID: consulCheck.ServiceID,
   524  			}
   525  			localChecks[consulCheck.CheckID] = &mergedCheck{agentCheckReg, 'a'}
   526  		}
   527  	}
   528  
   529  	missingChecks = make([]*consul.AgentCheckRegistration, 0, localChecksCount)
   530  	equalChecks = make([]*consul.AgentCheckRegistration, 0, equalChecksCount)
   531  	changedChecks = make([]*consul.AgentCheckRegistration, 0, changedChecksCount)
   532  	staleChecks = make([]*consul.AgentCheckRegistration, 0, agentChecks)
   533  	for _, check := range localChecks {
   534  		switch check.state {
   535  		case 'l':
   536  			missingChecks = append(missingChecks, check.check)
   537  		case 'e':
   538  			equalChecks = append(equalChecks, check.check)
   539  		case 'c':
   540  			changedChecks = append(changedChecks, check.check)
   541  		case 'a':
   542  			staleChecks = append(staleChecks, check.check)
   543  		}
   544  	}
   545  
   546  	return missingChecks, equalChecks, changedChecks, staleChecks
   547  }
   548  
   549  // compareConsulService takes a consul.AgentServiceRegistration instance and
   550  // compares it with a consul.AgentService.  Returns true if they are equal
   551  // according to consul.AgentService, otherwise false.
   552  func compareConsulService(localService *consul.AgentServiceRegistration, consulService *consul.AgentService) bool {
   553  	if consulService.ID != localService.ID ||
   554  		consulService.Service != localService.Name ||
   555  		consulService.Port != localService.Port ||
   556  		consulService.Address != localService.Address ||
   557  		consulService.EnableTagOverride != localService.EnableTagOverride {
   558  		return false
   559  	}
   560  
   561  	serviceTags := make(map[string]byte, len(localService.Tags))
   562  	for _, tag := range localService.Tags {
   563  		serviceTags[tag] = 'l'
   564  	}
   565  	for _, tag := range consulService.Tags {
   566  		if _, found := serviceTags[tag]; !found {
   567  			return false
   568  		}
   569  		serviceTags[tag] = 'b'
   570  	}
   571  	for _, state := range serviceTags {
   572  		if state == 'l' {
   573  			return false
   574  		}
   575  	}
   576  
   577  	return true
   578  }
   579  
   580  // calcServicesDiff takes the argument (consulServices) and calculates the
   581  // delta between the consul.Syncer's list of known services
   582  // (c.flattenedServices()).  Four arrays are returned:
   583  //
   584  // 1) a slice of services that exist only locally in the Syncer and are
   585  // missing from the Consul Agent (consulServices) and therefore need to be
   586  // registered.
   587  //
   588  // 2) a slice of services that exist in both the local consul.Syncer's
   589  // tracked list and Consul Agent (consulServices) *AND* are identical.
   590  //
   591  // 3) a slice of services that exist in both the local consul.Syncer's
   592  // tracked list and Consul Agent (consulServices) but have diverged state.
   593  //
   594  // 4) a slice of services that exist only in the Consul Agent
   595  // (consulServices) and should be removed because the Consul Agent has
   596  // drifted from the Syncer.
   597  func (c *Syncer) calcServicesDiff(consulServices map[consulServiceID]*consul.AgentService) (missingServices []*consul.AgentServiceRegistration, equalServices []*consul.AgentServiceRegistration, changedServices []*consul.AgentServiceRegistration, staleServices []*consul.AgentServiceRegistration) {
   598  	type mergedService struct {
   599  		service *consul.AgentServiceRegistration
   600  		// 'l' == Nomad local only
   601  		// 'e' == equal
   602  		// 'c' == changed
   603  		// 'a' == Consul agent only
   604  		state byte
   605  	}
   606  	var (
   607  		localServicesCount   = 0
   608  		equalServicesCount   = 0
   609  		changedServicesCount = 0
   610  		agentServices        = 0
   611  	)
   612  	flattenedServices := c.flattenedServices()
   613  	localServices := make(map[string]*mergedService, len(flattenedServices)+len(consulServices))
   614  	for _, localService := range flattenedServices {
   615  		localServicesCount++
   616  		localServices[localService.ID] = &mergedService{localService, 'l'}
   617  	}
   618  	for _, consulService := range consulServices {
   619  		if localService, found := localServices[consulService.ID]; found {
   620  			localServicesCount--
   621  			if compareConsulService(localService.service, consulService) {
   622  				equalServicesCount++
   623  				localServices[consulService.ID].state = 'e'
   624  			} else {
   625  				changedServicesCount++
   626  				localServices[consulService.ID].state = 'c'
   627  			}
   628  		} else {
   629  			agentServices++
   630  			agentServiceReg := &consul.AgentServiceRegistration{
   631  				ID:      consulService.ID,
   632  				Name:    consulService.Service,
   633  				Tags:    consulService.Tags,
   634  				Port:    consulService.Port,
   635  				Address: consulService.Address,
   636  			}
   637  			localServices[consulService.ID] = &mergedService{agentServiceReg, 'a'}
   638  		}
   639  	}
   640  
   641  	missingServices = make([]*consul.AgentServiceRegistration, 0, localServicesCount)
   642  	equalServices = make([]*consul.AgentServiceRegistration, 0, equalServicesCount)
   643  	changedServices = make([]*consul.AgentServiceRegistration, 0, changedServicesCount)
   644  	staleServices = make([]*consul.AgentServiceRegistration, 0, agentServices)
   645  	for _, service := range localServices {
   646  		switch service.state {
   647  		case 'l':
   648  			missingServices = append(missingServices, service.service)
   649  		case 'e':
   650  			equalServices = append(equalServices, service.service)
   651  		case 'c':
   652  			changedServices = append(changedServices, service.service)
   653  		case 'a':
   654  			staleServices = append(staleServices, service.service)
   655  		}
   656  	}
   657  
   658  	return missingServices, equalServices, changedServices, staleServices
   659  }
   660  
   661  // syncServices synchronizes this Syncer's Consul Services with the Consul
   662  // Agent.
   663  func (c *Syncer) syncServices() error {
   664  	consulServices, err := c.queryAgentServices()
   665  	if err != nil {
   666  		return err
   667  	}
   668  
   669  	// Synchronize services with Consul
   670  	var mErr multierror.Error
   671  	missingServices, _, changedServices, removedServices := c.calcServicesDiff(consulServices)
   672  	for _, service := range missingServices {
   673  		if err := c.client.Agent().ServiceRegister(service); err != nil {
   674  			mErr.Errors = append(mErr.Errors, err)
   675  		}
   676  	}
   677  	for _, service := range changedServices {
   678  		// Re-register the local service
   679  		if err := c.client.Agent().ServiceRegister(service); err != nil {
   680  			mErr.Errors = append(mErr.Errors, err)
   681  		}
   682  	}
   683  	for _, service := range removedServices {
   684  		if err := c.deregisterService(service.ID); err != nil {
   685  			mErr.Errors = append(mErr.Errors, err)
   686  		}
   687  	}
   688  	return mErr.ErrorOrNil()
   689  }
   690  
   691  // registerCheck registers a check definition with Consul
   692  func (c *Syncer) registerCheck(chkReg *consul.AgentCheckRegistration) error {
   693  	c.registryLock.RLock()
   694  	if cr, ok := c.checkRunners[consulCheckID(chkReg.ID)]; ok {
   695  		cr.Start()
   696  	}
   697  	c.registryLock.RUnlock()
   698  	return c.client.Agent().CheckRegister(chkReg)
   699  }
   700  
   701  // ensureCheckRunning starts the check runner for a check if it's not already running
   702  func (c *Syncer) ensureCheckRunning(chk *consul.AgentCheckRegistration) {
   703  	c.registryLock.RLock()
   704  	defer c.registryLock.RUnlock()
   705  	if cr, ok := c.checkRunners[consulCheckID(chk.ID)]; ok && !cr.Started() {
   706  		c.logger.Printf("[DEBUG] consul.syncer: starting runner for existing check. %v", chk.ID)
   707  		cr.Start()
   708  	}
   709  }
   710  
   711  // createCheckReg creates a Check that can be registered with Nomad. It also
   712  // creates a Nomad check for the check types that it can handle.
   713  func (c *Syncer) createCheckReg(check *structs.ServiceCheck, serviceReg *consul.AgentServiceRegistration) (*consul.AgentCheckRegistration, error) {
   714  	chkReg := consul.AgentCheckRegistration{
   715  		ID:        check.Hash(serviceReg.ID),
   716  		Name:      check.Name,
   717  		ServiceID: serviceReg.ID,
   718  	}
   719  	chkReg.Timeout = check.Timeout.String()
   720  	chkReg.Interval = check.Interval.String()
   721  	host, port := serviceReg.Address, serviceReg.Port
   722  	if check.PortLabel != "" {
   723  		host, port = c.addrFinder(check.PortLabel)
   724  	}
   725  	switch check.Type {
   726  	case structs.ServiceCheckHTTP:
   727  		if check.Protocol == "" {
   728  			check.Protocol = "http"
   729  		}
   730  		base := url.URL{
   731  			Scheme: check.Protocol,
   732  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
   733  		}
   734  		relative, err := url.Parse(check.Path)
   735  		if err != nil {
   736  			return nil, err
   737  		}
   738  		url := base.ResolveReference(relative)
   739  		chkReg.HTTP = url.String()
   740  	case structs.ServiceCheckTCP:
   741  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
   742  	case structs.ServiceCheckScript:
   743  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
   744  	default:
   745  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
   746  	}
   747  	chkReg.Status = check.InitialStatus
   748  	return &chkReg, nil
   749  }
   750  
   751  // generateConsulServiceID takes the domain and service key and returns a Consul
   752  // ServiceID
   753  func generateConsulServiceID(domain ServiceDomain, key ServiceKey) consulServiceID {
   754  	return consulServiceID(fmt.Sprintf("%s-%s-%s", nomadServicePrefix, domain, key))
   755  }
   756  
   757  // createService creates a Consul AgentService from a Nomad ConsulService.
   758  func (c *Syncer) createService(service *structs.Service, domain ServiceDomain, key ServiceKey) (*consul.AgentServiceRegistration, error) {
   759  	c.registryLock.RLock()
   760  	defer c.registryLock.RUnlock()
   761  
   762  	srv := consul.AgentServiceRegistration{
   763  		ID:   string(generateConsulServiceID(domain, key)),
   764  		Name: service.Name,
   765  		Tags: service.Tags,
   766  	}
   767  	host, port := c.addrFinder(service.PortLabel)
   768  	if host != "" {
   769  		srv.Address = host
   770  	}
   771  
   772  	if port != 0 {
   773  		srv.Port = port
   774  	}
   775  
   776  	return &srv, nil
   777  }
   778  
   779  // deregisterService de-registers a service with the given ID from consul
   780  func (c *Syncer) deregisterService(serviceID string) error {
   781  	return c.client.Agent().ServiceDeregister(serviceID)
   782  }
   783  
   784  // deregisterCheck de-registers a check from Consul
   785  func (c *Syncer) deregisterCheck(id consulCheckID) error {
   786  	c.registryLock.Lock()
   787  	defer c.registryLock.Unlock()
   788  
   789  	// Deleting from Consul Agent
   790  	if err := c.client.Agent().CheckDeregister(string(id)); err != nil {
   791  		// CheckDeregister() will be reattempted again in a future
   792  		// sync.
   793  		return err
   794  	}
   795  
   796  	// Remove the check from the local registry
   797  	if cr, ok := c.checkRunners[id]; ok {
   798  		cr.Stop()
   799  		delete(c.checkRunners, id)
   800  	}
   801  
   802  	return nil
   803  }
   804  
   805  // Run triggers periodic syncing of services and checks with Consul.  This is
   806  // a long lived go-routine which is stopped during shutdown.
   807  func (c *Syncer) Run() {
   808  	sync := time.NewTimer(0)
   809  	for {
   810  		select {
   811  		case <-sync.C:
   812  			d := syncInterval - lib.RandomStagger(syncInterval/syncJitter)
   813  			sync.Reset(d)
   814  
   815  			if err := c.SyncServices(); err != nil {
   816  				if c.consulAvailable {
   817  					c.logger.Printf("[DEBUG] consul.syncer: error in syncing: %v", err)
   818  				}
   819  				c.consulAvailable = false
   820  			} else {
   821  				if !c.consulAvailable {
   822  					c.logger.Printf("[DEBUG] consul.syncer: syncs succesful")
   823  				}
   824  				c.consulAvailable = true
   825  			}
   826  		case <-c.notifySyncCh:
   827  			sync.Reset(syncInterval)
   828  		case <-c.shutdownCh:
   829  			c.Shutdown()
   830  		case <-c.notifyShutdownCh:
   831  			sync.Stop()
   832  			c.logger.Printf("[INFO] consul.syncer: shutting down syncer ")
   833  			return
   834  		}
   835  	}
   836  }
   837  
   838  // RunHandlers executes each handler (randomly)
   839  func (c *Syncer) RunHandlers() error {
   840  	c.periodicLock.RLock()
   841  	handlers := make(map[string]types.PeriodicCallback, len(c.periodicCallbacks))
   842  	for name, fn := range c.periodicCallbacks {
   843  		handlers[name] = fn
   844  	}
   845  	c.periodicLock.RUnlock()
   846  
   847  	var mErr multierror.Error
   848  	for _, fn := range handlers {
   849  		if err := fn(); err != nil {
   850  			mErr.Errors = append(mErr.Errors, err)
   851  		}
   852  	}
   853  	return mErr.ErrorOrNil()
   854  }
   855  
   856  // SyncServices sync the services with the Consul Agent
   857  func (c *Syncer) SyncServices() error {
   858  	var mErr multierror.Error
   859  	if err := c.syncServices(); err != nil {
   860  		mErr.Errors = append(mErr.Errors, err)
   861  	}
   862  	if err := c.syncChecks(); err != nil {
   863  		mErr.Errors = append(mErr.Errors, err)
   864  	}
   865  	if err := c.RunHandlers(); err != nil {
   866  		return err
   867  	}
   868  	return mErr.ErrorOrNil()
   869  }
   870  
   871  // filterConsulServices prunes out all the service who were not registered with
   872  // the syncer
   873  func (c *Syncer) filterConsulServices(consulServices map[string]*consul.AgentService) map[consulServiceID]*consul.AgentService {
   874  	localServices := make(map[consulServiceID]*consul.AgentService, len(consulServices))
   875  	c.registryLock.RLock()
   876  	defer c.registryLock.RUnlock()
   877  	for serviceID, service := range consulServices {
   878  		for domain := range c.servicesGroups {
   879  			if strings.HasPrefix(service.ID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) {
   880  				localServices[consulServiceID(serviceID)] = service
   881  				break
   882  			}
   883  		}
   884  	}
   885  	return localServices
   886  }
   887  
   888  // filterConsulChecks prunes out all the consul checks which do not have
   889  // services with Syncer's idPrefix.
   890  func (c *Syncer) filterConsulChecks(consulChecks map[string]*consul.AgentCheck) map[consulCheckID]*consul.AgentCheck {
   891  	localChecks := make(map[consulCheckID]*consul.AgentCheck, len(consulChecks))
   892  	c.registryLock.RLock()
   893  	defer c.registryLock.RUnlock()
   894  	for checkID, check := range consulChecks {
   895  		for domain := range c.checkGroups {
   896  			if strings.HasPrefix(check.ServiceID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) {
   897  				localChecks[consulCheckID(checkID)] = check
   898  				break
   899  			}
   900  		}
   901  	}
   902  	return localChecks
   903  }
   904  
   905  // consulPresent indicates whether the Consul Agent is responding
   906  func (c *Syncer) consulPresent() bool {
   907  	_, err := c.client.Agent().Self()
   908  	return err == nil
   909  }
   910  
   911  // runCheck runs a check and updates the corresponding ttl check in consul
   912  func (c *Syncer) runCheck(check Check) {
   913  	res := check.Run()
   914  	if res.Duration >= check.Timeout() {
   915  		c.logger.Printf("[DEBUG] consul.syncer: check took time: %v, timeout: %v", res.Duration, check.Timeout())
   916  	}
   917  	state := consul.HealthCritical
   918  	output := res.Output
   919  	switch res.ExitCode {
   920  	case 0:
   921  		state = consul.HealthPassing
   922  	case 1:
   923  		state = consul.HealthWarning
   924  	default:
   925  		state = consul.HealthCritical
   926  	}
   927  	if res.Err != nil {
   928  		state = consul.HealthCritical
   929  		output = res.Err.Error()
   930  	}
   931  	if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil {
   932  		if c.consulAvailable {
   933  			c.logger.Printf("[DEBUG] consul.syncer: check %+q failed, disabling Consul checks until until next successful sync: %v", check.ID(), err)
   934  			c.consulAvailable = false
   935  		} else {
   936  			c.consulAvailable = true
   937  		}
   938  	}
   939  }
   940  
   941  // ReapUnmatched prunes all services that do not exist in the passed domains
   942  func (c *Syncer) ReapUnmatched(domains []ServiceDomain) error {
   943  	servicesInConsul, err := c.ConsulClient().Agent().Services()
   944  	if err != nil {
   945  		return err
   946  	}
   947  
   948  	var mErr multierror.Error
   949  	for serviceID := range servicesInConsul {
   950  		// Skip any service that was not registered by Nomad
   951  		if !strings.HasPrefix(serviceID, nomadServicePrefix) {
   952  			continue
   953  		}
   954  
   955  		// Filter services that do not exist in the desired domains
   956  		match := false
   957  		for _, domain := range domains {
   958  			// Include the hyphen so it is explicit to that domain otherwise it
   959  			// maybe a subset match
   960  			desired := fmt.Sprintf("%s-%s-", nomadServicePrefix, domain)
   961  			if strings.HasPrefix(serviceID, desired) {
   962  				match = true
   963  				break
   964  			}
   965  		}
   966  
   967  		if !match {
   968  			if err := c.deregisterService(serviceID); err != nil {
   969  				mErr.Errors = append(mErr.Errors, err)
   970  			}
   971  		}
   972  	}
   973  
   974  	return mErr.ErrorOrNil()
   975  }
   976  
   977  // AddPeriodicHandler adds a uniquely named callback.  Returns true if
   978  // successful, false if a handler with the same name already exists.
   979  func (c *Syncer) AddPeriodicHandler(name string, fn types.PeriodicCallback) bool {
   980  	c.periodicLock.Lock()
   981  	defer c.periodicLock.Unlock()
   982  	if _, found := c.periodicCallbacks[name]; found {
   983  		c.logger.Printf("[ERROR] consul.syncer: failed adding handler %+q", name)
   984  		return false
   985  	}
   986  	c.periodicCallbacks[name] = fn
   987  	return true
   988  }
   989  
   990  // NumHandlers returns the number of callbacks registered with the syncer
   991  func (c *Syncer) NumHandlers() int {
   992  	c.periodicLock.RLock()
   993  	defer c.periodicLock.RUnlock()
   994  	return len(c.periodicCallbacks)
   995  }
   996  
   997  // RemovePeriodicHandler removes a handler with a given name.
   998  func (c *Syncer) RemovePeriodicHandler(name string) {
   999  	c.periodicLock.Lock()
  1000  	defer c.periodicLock.Unlock()
  1001  	delete(c.periodicCallbacks, name)
  1002  }
  1003  
  1004  // ConsulClient returns the Consul client used by the Syncer.
  1005  func (c *Syncer) ConsulClient() *consul.Client {
  1006  	return c.client
  1007  }