github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/command/agent/consul/client.go (about)

     1  package consul
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"net"
     7  	"net/url"
     8  	"strconv"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	metrics "github.com/armon/go-metrics"
    14  	"github.com/hashicorp/consul/api"
    15  	"github.com/hashicorp/nomad/client/driver"
    16  	cstructs "github.com/hashicorp/nomad/client/structs"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  )
    19  
    20  const (
    21  	// nomadServicePrefix is the first prefix that scopes all Nomad registered
    22  	// services
    23  	nomadServicePrefix = "_nomad"
    24  
    25  	// defaultRetryInterval is how quickly to retry syncing services and
    26  	// checks to Consul when an error occurs. Will backoff up to a max.
    27  	defaultRetryInterval = time.Second
    28  
    29  	// defaultMaxRetryInterval is the default max retry interval.
    30  	defaultMaxRetryInterval = 30 * time.Second
    31  
    32  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    33  	// the check result
    34  	ttlCheckBuffer = 31 * time.Second
    35  
    36  	// defaultShutdownWait is how long Shutdown() should block waiting for
    37  	// enqueued operations to sync to Consul by default.
    38  	defaultShutdownWait = time.Minute
    39  
    40  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    41  	// spend waiting for a response from a Consul Query.
    42  	DefaultQueryWaitDuration = 2 * time.Second
    43  
    44  	// ServiceTagHTTP is the tag assigned to HTTP services
    45  	ServiceTagHTTP = "http"
    46  
    47  	// ServiceTagRPC is the tag assigned to RPC services
    48  	ServiceTagRPC = "rpc"
    49  
    50  	// ServiceTagSerf is the tag assigned to Serf services
    51  	ServiceTagSerf = "serf"
    52  )
    53  
    54  // CatalogAPI is the consul/api.Catalog API used by Nomad.
    55  type CatalogAPI interface {
    56  	Datacenters() ([]string, error)
    57  	Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error)
    58  }
    59  
    60  // AgentAPI is the consul/api.Agent API used by Nomad.
    61  type AgentAPI interface {
    62  	Services() (map[string]*api.AgentService, error)
    63  	Checks() (map[string]*api.AgentCheck, error)
    64  	CheckRegister(check *api.AgentCheckRegistration) error
    65  	CheckDeregister(checkID string) error
    66  	ServiceRegister(service *api.AgentServiceRegistration) error
    67  	ServiceDeregister(serviceID string) error
    68  	UpdateTTL(id, output, status string) error
    69  }
    70  
    71  // addrParser is usually the Task.FindHostAndPortFor method for turning a
    72  // portLabel into an address and port.
    73  type addrParser func(portLabel string) (string, int)
    74  
    75  // operations are submitted to the main loop via commit() for synchronizing
    76  // with Consul.
    77  type operations struct {
    78  	regServices []*api.AgentServiceRegistration
    79  	regChecks   []*api.AgentCheckRegistration
    80  	scripts     []*scriptCheck
    81  
    82  	deregServices []string
    83  	deregChecks   []string
    84  }
    85  
    86  // ServiceClient handles task and agent service registration with Consul.
    87  type ServiceClient struct {
    88  	client           AgentAPI
    89  	logger           *log.Logger
    90  	retryInterval    time.Duration
    91  	maxRetryInterval time.Duration
    92  
    93  	// skipVerifySupport is true if the local Consul agent suppots TLSSkipVerify
    94  	skipVerifySupport bool
    95  
    96  	// exitCh is closed when the main Run loop exits
    97  	exitCh chan struct{}
    98  
    99  	// shutdownCh is closed when the client should shutdown
   100  	shutdownCh chan struct{}
   101  
   102  	// shutdownWait is how long Shutdown() blocks waiting for the final
   103  	// sync() to finish. Defaults to defaultShutdownWait
   104  	shutdownWait time.Duration
   105  
   106  	opCh chan *operations
   107  
   108  	services       map[string]*api.AgentServiceRegistration
   109  	checks         map[string]*api.AgentCheckRegistration
   110  	scripts        map[string]*scriptCheck
   111  	runningScripts map[string]*scriptHandle
   112  
   113  	// agent services and checks record entries for the agent itself which
   114  	// should be removed on shutdown
   115  	agentServices map[string]struct{}
   116  	agentChecks   map[string]struct{}
   117  	agentLock     sync.Mutex
   118  }
   119  
   120  // NewServiceClient creates a new Consul ServiceClient from an existing Consul API
   121  // Client and logger.
   122  func NewServiceClient(consulClient AgentAPI, skipVerifySupport bool, logger *log.Logger) *ServiceClient {
   123  	return &ServiceClient{
   124  		client:            consulClient,
   125  		skipVerifySupport: skipVerifySupport,
   126  		logger:            logger,
   127  		retryInterval:     defaultRetryInterval,
   128  		maxRetryInterval:  defaultMaxRetryInterval,
   129  		exitCh:            make(chan struct{}),
   130  		shutdownCh:        make(chan struct{}),
   131  		shutdownWait:      defaultShutdownWait,
   132  		opCh:              make(chan *operations, 8),
   133  		services:          make(map[string]*api.AgentServiceRegistration),
   134  		checks:            make(map[string]*api.AgentCheckRegistration),
   135  		scripts:           make(map[string]*scriptCheck),
   136  		runningScripts:    make(map[string]*scriptHandle),
   137  		agentServices:     make(map[string]struct{}),
   138  		agentChecks:       make(map[string]struct{}),
   139  	}
   140  }
   141  
   142  // Run the Consul main loop which retries operations against Consul. It should
   143  // be called exactly once.
   144  func (c *ServiceClient) Run() {
   145  	defer close(c.exitCh)
   146  	retryTimer := time.NewTimer(0)
   147  	<-retryTimer.C // disabled by default
   148  	failures := 0
   149  	for {
   150  		select {
   151  		case <-retryTimer.C:
   152  		case <-c.shutdownCh:
   153  		case ops := <-c.opCh:
   154  			c.merge(ops)
   155  		}
   156  
   157  		if err := c.sync(); err != nil {
   158  			if failures == 0 {
   159  				c.logger.Printf("[WARN] consul.sync: failed to update services in Consul: %v", err)
   160  			}
   161  			failures++
   162  			if !retryTimer.Stop() {
   163  				// Timer already expired, since the timer may
   164  				// or may not have been read in the select{}
   165  				// above, conditionally receive on it
   166  				select {
   167  				case <-retryTimer.C:
   168  				default:
   169  				}
   170  			}
   171  			backoff := c.retryInterval * time.Duration(failures)
   172  			if backoff > c.maxRetryInterval {
   173  				backoff = c.maxRetryInterval
   174  			}
   175  			retryTimer.Reset(backoff)
   176  		} else {
   177  			if failures > 0 {
   178  				c.logger.Printf("[INFO] consul.sync: successfully updated services in Consul")
   179  				failures = 0
   180  			}
   181  		}
   182  
   183  		select {
   184  		case <-c.shutdownCh:
   185  			// Exit only after sync'ing all outstanding operations
   186  			if len(c.opCh) > 0 {
   187  				for len(c.opCh) > 0 {
   188  					c.merge(<-c.opCh)
   189  				}
   190  				continue
   191  			}
   192  			return
   193  		default:
   194  		}
   195  
   196  	}
   197  }
   198  
   199  // commit operations unless already shutting down.
   200  func (c *ServiceClient) commit(ops *operations) {
   201  	select {
   202  	case c.opCh <- ops:
   203  	case <-c.shutdownCh:
   204  	}
   205  }
   206  
   207  // merge registrations into state map prior to sync'ing with Consul
   208  func (c *ServiceClient) merge(ops *operations) {
   209  	for _, s := range ops.regServices {
   210  		c.services[s.ID] = s
   211  	}
   212  	for _, check := range ops.regChecks {
   213  		c.checks[check.ID] = check
   214  	}
   215  	for _, s := range ops.scripts {
   216  		c.scripts[s.id] = s
   217  	}
   218  	for _, sid := range ops.deregServices {
   219  		delete(c.services, sid)
   220  	}
   221  	for _, cid := range ops.deregChecks {
   222  		if script, ok := c.runningScripts[cid]; ok {
   223  			script.cancel()
   224  			delete(c.scripts, cid)
   225  			delete(c.runningScripts, cid)
   226  		}
   227  		delete(c.checks, cid)
   228  	}
   229  	metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
   230  	metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
   231  	metrics.SetGauge([]string{"client", "consul", "script_checks"}, float32(len(c.runningScripts)))
   232  }
   233  
   234  // sync enqueued operations.
   235  func (c *ServiceClient) sync() error {
   236  	sreg, creg, sdereg, cdereg := 0, 0, 0, 0
   237  
   238  	consulServices, err := c.client.Services()
   239  	if err != nil {
   240  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   241  		return fmt.Errorf("error querying Consul services: %v", err)
   242  	}
   243  
   244  	consulChecks, err := c.client.Checks()
   245  	if err != nil {
   246  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   247  		return fmt.Errorf("error querying Consul checks: %v", err)
   248  	}
   249  
   250  	// Remove Nomad services in Consul but unknown locally
   251  	for id := range consulServices {
   252  		if _, ok := c.services[id]; ok {
   253  			// Known service, skip
   254  			continue
   255  		}
   256  		if !isNomadService(id) {
   257  			// Not managed by Nomad, skip
   258  			continue
   259  		}
   260  		// Unknown Nomad managed service; kill
   261  		if err := c.client.ServiceDeregister(id); err != nil {
   262  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   263  			return err
   264  		}
   265  		sdereg++
   266  		metrics.IncrCounter([]string{"client", "consul", "service_deregisrations"}, 1)
   267  	}
   268  
   269  	// Track services whose ports have changed as their checks may also
   270  	// need updating
   271  	portsChanged := make(map[string]struct{}, len(c.services))
   272  
   273  	// Add Nomad services missing from Consul
   274  	for id, locals := range c.services {
   275  		if remotes, ok := consulServices[id]; ok {
   276  			// Make sure Port and Address are stable since
   277  			// PortLabel and AddressMode aren't included in the
   278  			// service ID.
   279  			if locals.Port == remotes.Port && locals.Address == remotes.Address {
   280  				// Already exists in Consul; skip
   281  				continue
   282  			}
   283  			// Port changed, reregister it and its checks
   284  			portsChanged[id] = struct{}{}
   285  		}
   286  		if err = c.client.ServiceRegister(locals); err != nil {
   287  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   288  			return err
   289  		}
   290  		sreg++
   291  		metrics.IncrCounter([]string{"client", "consul", "service_regisrations"}, 1)
   292  	}
   293  
   294  	// Remove Nomad checks in Consul but unknown locally
   295  	for id, check := range consulChecks {
   296  		if _, ok := c.checks[id]; ok {
   297  			// Known check, leave it
   298  			continue
   299  		}
   300  		if !isNomadService(check.ServiceID) {
   301  			// Service not managed by Nomad, skip
   302  			continue
   303  		}
   304  		// Unknown Nomad managed check; kill
   305  		if err := c.client.CheckDeregister(id); err != nil {
   306  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   307  			return err
   308  		}
   309  		cdereg++
   310  		metrics.IncrCounter([]string{"client", "consul", "check_deregisrations"}, 1)
   311  	}
   312  
   313  	// Add Nomad checks missing from Consul
   314  	for id, check := range c.checks {
   315  		if check, ok := consulChecks[id]; ok {
   316  			if _, changed := portsChanged[check.ServiceID]; !changed {
   317  				// Already in Consul and ports didn't change; skipping
   318  				continue
   319  			}
   320  		}
   321  		if err := c.client.CheckRegister(check); err != nil {
   322  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   323  			return err
   324  		}
   325  		creg++
   326  		metrics.IncrCounter([]string{"client", "consul", "check_regisrations"}, 1)
   327  
   328  		// Handle starting scripts
   329  		if script, ok := c.scripts[id]; ok {
   330  			// If it's already running, cancel and replace
   331  			if oldScript, running := c.runningScripts[id]; running {
   332  				oldScript.cancel()
   333  			}
   334  			// Start and store the handle
   335  			c.runningScripts[id] = script.run()
   336  		}
   337  	}
   338  
   339  	c.logger.Printf("[DEBUG] consul.sync: registered %d services, %d checks; deregistered %d services, %d checks",
   340  		sreg, creg, sdereg, cdereg)
   341  	return nil
   342  }
   343  
   344  // RegisterAgent registers Nomad agents (client or server). The
   345  // Service.PortLabel should be a literal port to be parsed with SplitHostPort.
   346  // Script checks are not supported and will return an error. Registration is
   347  // asynchronous.
   348  //
   349  // Agents will be deregistered when Shutdown is called.
   350  func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error {
   351  	ops := operations{}
   352  
   353  	for _, service := range services {
   354  		id := makeAgentServiceID(role, service)
   355  
   356  		// Unlike tasks, agents don't use port labels. Agent ports are
   357  		// stored directly in the PortLabel.
   358  		host, rawport, err := net.SplitHostPort(service.PortLabel)
   359  		if err != nil {
   360  			return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err)
   361  		}
   362  		port, err := strconv.Atoi(rawport)
   363  		if err != nil {
   364  			return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err)
   365  		}
   366  		serviceReg := &api.AgentServiceRegistration{
   367  			ID:      id,
   368  			Name:    service.Name,
   369  			Tags:    service.Tags,
   370  			Address: host,
   371  			Port:    port,
   372  		}
   373  		ops.regServices = append(ops.regServices, serviceReg)
   374  
   375  		for _, check := range service.Checks {
   376  			checkID := makeCheckID(id, check)
   377  			if check.Type == structs.ServiceCheckScript {
   378  				return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name)
   379  			}
   380  			checkHost, checkPort := serviceReg.Address, serviceReg.Port
   381  			if check.PortLabel != "" {
   382  				// Unlike tasks, agents don't use port labels. Agent ports are
   383  				// stored directly in the PortLabel.
   384  				host, rawport, err := net.SplitHostPort(check.PortLabel)
   385  				if err != nil {
   386  					return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err)
   387  				}
   388  				port, err := strconv.Atoi(rawport)
   389  				if err != nil {
   390  					return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err)
   391  				}
   392  				checkHost, checkPort = host, port
   393  			}
   394  			checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort)
   395  			if err != nil {
   396  				return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   397  			}
   398  			ops.regChecks = append(ops.regChecks, checkReg)
   399  		}
   400  	}
   401  
   402  	// Don't bother committing agent checks if we're already shutting down
   403  	c.agentLock.Lock()
   404  	defer c.agentLock.Unlock()
   405  	select {
   406  	case <-c.shutdownCh:
   407  		return nil
   408  	default:
   409  	}
   410  
   411  	// Now add them to the registration queue
   412  	c.commit(&ops)
   413  
   414  	// Record IDs for deregistering on shutdown
   415  	for _, id := range ops.regServices {
   416  		c.agentServices[id.ID] = struct{}{}
   417  	}
   418  	for _, id := range ops.regChecks {
   419  		c.agentChecks[id.ID] = struct{}{}
   420  	}
   421  	return nil
   422  }
   423  
   424  // serviceRegs creates service registrations, check registrations, and script
   425  // checks from a service.
   426  func (c *ServiceClient) serviceRegs(ops *operations, allocID string, service *structs.Service,
   427  	task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   428  
   429  	id := makeTaskServiceID(allocID, task.Name, service)
   430  	addrMode := service.AddressMode
   431  	if addrMode == structs.AddressModeAuto {
   432  		if net.Advertise() {
   433  			addrMode = structs.AddressModeDriver
   434  		} else {
   435  			// No driver network or shouldn't default to driver's network
   436  			addrMode = structs.AddressModeHost
   437  		}
   438  	}
   439  	ip, port := task.Resources.Networks.Port(service.PortLabel)
   440  	if addrMode == structs.AddressModeDriver {
   441  		if net == nil {
   442  			return fmt.Errorf("service %s cannot use driver's IP because driver didn't set one", service.Name)
   443  		}
   444  		ip = net.IP
   445  		port = net.PortMap[service.PortLabel]
   446  	}
   447  	serviceReg := &api.AgentServiceRegistration{
   448  		ID:      id,
   449  		Name:    service.Name,
   450  		Tags:    make([]string, len(service.Tags)),
   451  		Address: ip,
   452  		Port:    port,
   453  	}
   454  	// copy isn't strictly necessary but can avoid bugs especially
   455  	// with tests that may reuse Tasks
   456  	copy(serviceReg.Tags, service.Tags)
   457  	ops.regServices = append(ops.regServices, serviceReg)
   458  	return c.checkRegs(ops, allocID, id, service, task, exec, net)
   459  }
   460  
   461  func (c *ServiceClient) checkRegs(ops *operations, allocID, serviceID string, service *structs.Service,
   462  	task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   463  
   464  	for _, check := range service.Checks {
   465  		if check.TLSSkipVerify && !c.skipVerifySupport {
   466  			c.logger.Printf("[WARN] consul.sync: skipping check %q for task %q alloc %q because Consul doesn't support tls_skip_verify. Please upgrade to Consul >= 0.7.2.",
   467  				check.Name, task.Name, allocID)
   468  			continue
   469  		}
   470  		checkID := makeCheckID(serviceID, check)
   471  		if check.Type == structs.ServiceCheckScript {
   472  			if exec == nil {
   473  				return fmt.Errorf("driver doesn't support script checks")
   474  			}
   475  			ops.scripts = append(ops.scripts, newScriptCheck(
   476  				allocID, task.Name, checkID, check, exec, c.client, c.logger, c.shutdownCh))
   477  
   478  		}
   479  
   480  		// Checks should always use the host ip:port
   481  		portLabel := check.PortLabel
   482  		if portLabel == "" {
   483  			// Default to the service's port label
   484  			portLabel = service.PortLabel
   485  		}
   486  		ip, port := task.Resources.Networks.Port(portLabel)
   487  		checkReg, err := createCheckReg(serviceID, checkID, check, ip, port)
   488  		if err != nil {
   489  			return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   490  		}
   491  		ops.regChecks = append(ops.regChecks, checkReg)
   492  	}
   493  	return nil
   494  }
   495  
   496  // RegisterTask with Consul. Adds all sevice entries and checks to Consul. If
   497  // exec is nil and a script check exists an error is returned.
   498  //
   499  // If the service IP is set it used as the address in the service registration.
   500  // Checks will always use the IP from the Task struct (host's IP).
   501  //
   502  // Actual communication with Consul is done asynchrously (see Run).
   503  func (c *ServiceClient) RegisterTask(allocID string, task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   504  	ops := &operations{}
   505  	for _, service := range task.Services {
   506  		if err := c.serviceRegs(ops, allocID, service, task, exec, net); err != nil {
   507  			return err
   508  		}
   509  	}
   510  	c.commit(ops)
   511  	return nil
   512  }
   513  
   514  // UpdateTask in Consul. Does not alter the service if only checks have
   515  // changed.
   516  //
   517  // DriverNetwork must not change between invocations for the same allocation.
   518  func (c *ServiceClient) UpdateTask(allocID string, existing, newTask *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   519  	ops := &operations{}
   520  
   521  	existingIDs := make(map[string]*structs.Service, len(existing.Services))
   522  	for _, s := range existing.Services {
   523  		existingIDs[makeTaskServiceID(allocID, existing.Name, s)] = s
   524  	}
   525  	newIDs := make(map[string]*structs.Service, len(newTask.Services))
   526  	for _, s := range newTask.Services {
   527  		newIDs[makeTaskServiceID(allocID, newTask.Name, s)] = s
   528  	}
   529  
   530  	// Loop over existing Service IDs to see if they have been removed or
   531  	// updated.
   532  	for existingID, existingSvc := range existingIDs {
   533  		newSvc, ok := newIDs[existingID]
   534  		if !ok {
   535  			// Existing sevice entry removed
   536  			ops.deregServices = append(ops.deregServices, existingID)
   537  			for _, check := range existingSvc.Checks {
   538  				ops.deregChecks = append(ops.deregChecks, makeCheckID(existingID, check))
   539  			}
   540  			continue
   541  		}
   542  
   543  		// PortLabel and AddressMode aren't included in the ID, so we
   544  		// have to compare manually.
   545  		serviceUnchanged := newSvc.PortLabel == existingSvc.PortLabel && newSvc.AddressMode == existingSvc.AddressMode
   546  		if serviceUnchanged {
   547  			// Service exists and hasn't changed, don't add it later
   548  			delete(newIDs, existingID)
   549  		}
   550  
   551  		// Check to see what checks were updated
   552  		existingChecks := make(map[string]struct{}, len(existingSvc.Checks))
   553  		for _, check := range existingSvc.Checks {
   554  			existingChecks[makeCheckID(existingID, check)] = struct{}{}
   555  		}
   556  
   557  		// Register new checks
   558  		for _, check := range newSvc.Checks {
   559  			checkID := makeCheckID(existingID, check)
   560  			if _, exists := existingChecks[checkID]; exists {
   561  				// Check exists, so don't remove it
   562  				delete(existingChecks, checkID)
   563  			} else if serviceUnchanged {
   564  				// New check on an unchanged service; add them now
   565  				err := c.checkRegs(ops, allocID, existingID, newSvc, newTask, exec, net)
   566  				if err != nil {
   567  					return err
   568  				}
   569  			}
   570  		}
   571  
   572  		// Remove existing checks not in updated service
   573  		for cid := range existingChecks {
   574  			ops.deregChecks = append(ops.deregChecks, cid)
   575  		}
   576  	}
   577  
   578  	// Any remaining services should just be enqueued directly
   579  	for _, newSvc := range newIDs {
   580  		err := c.serviceRegs(ops, allocID, newSvc, newTask, exec, net)
   581  		if err != nil {
   582  			return err
   583  		}
   584  	}
   585  
   586  	c.commit(ops)
   587  	return nil
   588  }
   589  
   590  // RemoveTask from Consul. Removes all service entries and checks.
   591  //
   592  // Actual communication with Consul is done asynchrously (see Run).
   593  func (c *ServiceClient) RemoveTask(allocID string, task *structs.Task) {
   594  	ops := operations{}
   595  
   596  	for _, service := range task.Services {
   597  		id := makeTaskServiceID(allocID, task.Name, service)
   598  		ops.deregServices = append(ops.deregServices, id)
   599  
   600  		for _, check := range service.Checks {
   601  			ops.deregChecks = append(ops.deregChecks, makeCheckID(id, check))
   602  		}
   603  	}
   604  
   605  	// Now add them to the deregistration fields; main Run loop will update
   606  	c.commit(&ops)
   607  }
   608  
   609  // Checks returns the checks registered against the agent for the given
   610  // allocation.
   611  func (c *ServiceClient) Checks(a *structs.Allocation) ([]*api.AgentCheck, error) {
   612  	tg := a.Job.LookupTaskGroup(a.TaskGroup)
   613  	if tg == nil {
   614  		return nil, fmt.Errorf("failed to find task group in alloc")
   615  	}
   616  
   617  	// Determine the checks that are relevant
   618  	relevant := make(map[string]struct{}, 4)
   619  	for _, task := range tg.Tasks {
   620  		for _, service := range task.Services {
   621  			id := makeTaskServiceID(a.ID, task.Name, service)
   622  			for _, check := range service.Checks {
   623  				relevant[makeCheckID(id, check)] = struct{}{}
   624  			}
   625  		}
   626  	}
   627  
   628  	// Query all the checks
   629  	checks, err := c.client.Checks()
   630  	if err != nil {
   631  		return nil, err
   632  	}
   633  
   634  	allocChecks := make([]*api.AgentCheck, 0, len(relevant))
   635  	for checkID := range relevant {
   636  		if check, ok := checks[checkID]; ok {
   637  			allocChecks = append(allocChecks, check)
   638  		}
   639  	}
   640  
   641  	return allocChecks, nil
   642  }
   643  
   644  // Shutdown the Consul client. Update running task registations and deregister
   645  // agent from Consul. On first call blocks up to shutdownWait before giving up
   646  // on syncing operations.
   647  func (c *ServiceClient) Shutdown() error {
   648  	// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
   649  	// entries.
   650  	c.agentLock.Lock()
   651  	select {
   652  	case <-c.shutdownCh:
   653  		return nil
   654  	default:
   655  	}
   656  
   657  	// Deregister Nomad agent Consul entries before closing shutdown.
   658  	ops := operations{}
   659  	for id := range c.agentServices {
   660  		ops.deregServices = append(ops.deregServices, id)
   661  	}
   662  	for id := range c.agentChecks {
   663  		ops.deregChecks = append(ops.deregChecks, id)
   664  	}
   665  	c.commit(&ops)
   666  
   667  	// Then signal shutdown
   668  	close(c.shutdownCh)
   669  
   670  	// Safe to unlock after shutdownCh closed as RegisterAgent will check
   671  	// shutdownCh before committing.
   672  	c.agentLock.Unlock()
   673  
   674  	// Give run loop time to sync, but don't block indefinitely
   675  	deadline := time.After(c.shutdownWait)
   676  
   677  	// Wait for Run to finish any outstanding operations and exit
   678  	select {
   679  	case <-c.exitCh:
   680  	case <-deadline:
   681  		// Don't wait forever though
   682  		return fmt.Errorf("timed out waiting for Consul operations to complete")
   683  	}
   684  
   685  	// Give script checks time to exit (no need to lock as Run() has exited)
   686  	for _, h := range c.runningScripts {
   687  		select {
   688  		case <-h.wait():
   689  		case <-deadline:
   690  			return fmt.Errorf("timed out waiting for script checks to run")
   691  		}
   692  	}
   693  	return nil
   694  }
   695  
   696  // makeAgentServiceID creates a unique ID for identifying an agent service in
   697  // Consul.
   698  //
   699  // Agent service IDs are of the form:
   700  //
   701  //	{nomadServicePrefix}-{ROLE}-{Service.Name}-{Service.Tags...}
   702  //	Example Server ID: _nomad-server-nomad-serf
   703  //	Example Client ID: _nomad-client-nomad-client-http
   704  //
   705  func makeAgentServiceID(role string, service *structs.Service) string {
   706  	parts := make([]string, len(service.Tags)+3)
   707  	parts[0] = nomadServicePrefix
   708  	parts[1] = role
   709  	parts[2] = service.Name
   710  	copy(parts[3:], service.Tags)
   711  	return strings.Join(parts, "-")
   712  }
   713  
   714  // makeTaskServiceID creates a unique ID for identifying a task service in
   715  // Consul.
   716  //
   717  // Task service IDs are of the form:
   718  //
   719  //	{nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
   720  //	Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
   721  //
   722  func makeTaskServiceID(allocID, taskName string, service *structs.Service) string {
   723  	parts := make([]string, len(service.Tags)+5)
   724  	parts[0] = nomadServicePrefix
   725  	parts[1] = "executor"
   726  	parts[2] = allocID
   727  	parts[3] = taskName
   728  	parts[4] = service.Name
   729  	copy(parts[5:], service.Tags)
   730  	return strings.Join(parts, "-")
   731  }
   732  
   733  // makeCheckID creates a unique ID for a check.
   734  func makeCheckID(serviceID string, check *structs.ServiceCheck) string {
   735  	return check.Hash(serviceID)
   736  }
   737  
   738  // createCheckReg creates a Check that can be registered with Consul.
   739  //
   740  // Script checks simply have a TTL set and the caller is responsible for
   741  // running the script and heartbeating.
   742  func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) {
   743  	chkReg := api.AgentCheckRegistration{
   744  		ID:        checkID,
   745  		Name:      check.Name,
   746  		ServiceID: serviceID,
   747  	}
   748  	chkReg.Status = check.InitialStatus
   749  	chkReg.Timeout = check.Timeout.String()
   750  	chkReg.Interval = check.Interval.String()
   751  
   752  	switch check.Type {
   753  	case structs.ServiceCheckHTTP:
   754  		proto := check.Protocol
   755  		if proto == "" {
   756  			proto = "http"
   757  		}
   758  		if check.TLSSkipVerify {
   759  			chkReg.TLSSkipVerify = true
   760  		}
   761  		base := url.URL{
   762  			Scheme: proto,
   763  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
   764  		}
   765  		relative, err := url.Parse(check.Path)
   766  		if err != nil {
   767  			return nil, err
   768  		}
   769  		url := base.ResolveReference(relative)
   770  		chkReg.HTTP = url.String()
   771  	case structs.ServiceCheckTCP:
   772  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
   773  	case structs.ServiceCheckScript:
   774  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
   775  	default:
   776  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
   777  	}
   778  	return &chkReg, nil
   779  }
   780  
   781  // isNomadService returns true if the ID matches the pattern of a Nomad managed
   782  // service. Agent services return false as independent client and server agents
   783  // may be running on the same machine. #2827
   784  func isNomadService(id string) bool {
   785  	const prefix = nomadServicePrefix + "-executor"
   786  	return strings.HasPrefix(id, prefix)
   787  }