github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/command/agent/consul/client.go (about)

     1  package consul
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"net"
     7  	"net/url"
     8  	"strconv"
     9  	"strings"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	metrics "github.com/armon/go-metrics"
    15  	"github.com/hashicorp/consul/api"
    16  	"github.com/hashicorp/nomad/client/driver"
    17  	cstructs "github.com/hashicorp/nomad/client/structs"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  )
    20  
    21  const (
    22  	// nomadServicePrefix is the first prefix that scopes all Nomad registered
    23  	// services
    24  	nomadServicePrefix = "_nomad"
    25  
    26  	// defaultRetryInterval is how quickly to retry syncing services and
    27  	// checks to Consul when an error occurs. Will backoff up to a max.
    28  	defaultRetryInterval = time.Second
    29  
    30  	// defaultMaxRetryInterval is the default max retry interval.
    31  	defaultMaxRetryInterval = 30 * time.Second
    32  
    33  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    34  	// the check result
    35  	ttlCheckBuffer = 31 * time.Second
    36  
    37  	// defaultShutdownWait is how long Shutdown() should block waiting for
    38  	// enqueued operations to sync to Consul by default.
    39  	defaultShutdownWait = time.Minute
    40  
    41  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    42  	// spend waiting for a response from a Consul Query.
    43  	DefaultQueryWaitDuration = 2 * time.Second
    44  
    45  	// ServiceTagHTTP is the tag assigned to HTTP services
    46  	ServiceTagHTTP = "http"
    47  
    48  	// ServiceTagRPC is the tag assigned to RPC services
    49  	ServiceTagRPC = "rpc"
    50  
    51  	// ServiceTagSerf is the tag assigned to Serf services
    52  	ServiceTagSerf = "serf"
    53  )
    54  
    55  // CatalogAPI is the consul/api.Catalog API used by Nomad.
    56  type CatalogAPI interface {
    57  	Datacenters() ([]string, error)
    58  	Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error)
    59  }
    60  
    61  // AgentAPI is the consul/api.Agent API used by Nomad.
    62  type AgentAPI interface {
    63  	Services() (map[string]*api.AgentService, error)
    64  	Checks() (map[string]*api.AgentCheck, error)
    65  	CheckRegister(check *api.AgentCheckRegistration) error
    66  	CheckDeregister(checkID string) error
    67  	ServiceRegister(service *api.AgentServiceRegistration) error
    68  	ServiceDeregister(serviceID string) error
    69  	UpdateTTL(id, output, status string) error
    70  }
    71  
    72  // addrParser is usually the Task.FindHostAndPortFor method for turning a
    73  // portLabel into an address and port.
    74  type addrParser func(portLabel string) (string, int)
    75  
    76  // operations are submitted to the main loop via commit() for synchronizing
    77  // with Consul.
    78  type operations struct {
    79  	regServices []*api.AgentServiceRegistration
    80  	regChecks   []*api.AgentCheckRegistration
    81  	scripts     []*scriptCheck
    82  
    83  	deregServices []string
    84  	deregChecks   []string
    85  }
    86  
    87  // ServiceClient handles task and agent service registration with Consul.
    88  type ServiceClient struct {
    89  	client           AgentAPI
    90  	logger           *log.Logger
    91  	retryInterval    time.Duration
    92  	maxRetryInterval time.Duration
    93  
    94  	// skipVerifySupport is true if the local Consul agent suppots TLSSkipVerify
    95  	skipVerifySupport bool
    96  
    97  	// exitCh is closed when the main Run loop exits
    98  	exitCh chan struct{}
    99  
   100  	// shutdownCh is closed when the client should shutdown
   101  	shutdownCh chan struct{}
   102  
   103  	// shutdownWait is how long Shutdown() blocks waiting for the final
   104  	// sync() to finish. Defaults to defaultShutdownWait
   105  	shutdownWait time.Duration
   106  
   107  	opCh chan *operations
   108  
   109  	services       map[string]*api.AgentServiceRegistration
   110  	checks         map[string]*api.AgentCheckRegistration
   111  	scripts        map[string]*scriptCheck
   112  	runningScripts map[string]*scriptHandle
   113  
   114  	// agent services and checks record entries for the agent itself which
   115  	// should be removed on shutdown
   116  	agentServices map[string]struct{}
   117  	agentChecks   map[string]struct{}
   118  	agentLock     sync.Mutex
   119  
   120  	// seen is 1 if Consul has ever been seen; otherise 0. Accessed with
   121  	// atomics.
   122  	seen int64
   123  }
   124  
   125  // NewServiceClient creates a new Consul ServiceClient from an existing Consul API
   126  // Client and logger.
   127  func NewServiceClient(consulClient AgentAPI, skipVerifySupport bool, logger *log.Logger) *ServiceClient {
   128  	return &ServiceClient{
   129  		client:            consulClient,
   130  		skipVerifySupport: skipVerifySupport,
   131  		logger:            logger,
   132  		retryInterval:     defaultRetryInterval,
   133  		maxRetryInterval:  defaultMaxRetryInterval,
   134  		exitCh:            make(chan struct{}),
   135  		shutdownCh:        make(chan struct{}),
   136  		shutdownWait:      defaultShutdownWait,
   137  		opCh:              make(chan *operations, 8),
   138  		services:          make(map[string]*api.AgentServiceRegistration),
   139  		checks:            make(map[string]*api.AgentCheckRegistration),
   140  		scripts:           make(map[string]*scriptCheck),
   141  		runningScripts:    make(map[string]*scriptHandle),
   142  		agentServices:     make(map[string]struct{}),
   143  		agentChecks:       make(map[string]struct{}),
   144  	}
   145  }
   146  
   147  // seen is used by markSeen and hasSeen
   148  const seen = 1
   149  
   150  // markSeen marks Consul as having been seen (meaning at least one operation
   151  // has succeeded).
   152  func (c *ServiceClient) markSeen() {
   153  	atomic.StoreInt64(&c.seen, seen)
   154  }
   155  
   156  // hasSeen returns true if any Consul operation has ever succeeded. Useful to
   157  // squelch errors if Consul isn't running.
   158  func (c *ServiceClient) hasSeen() bool {
   159  	return atomic.LoadInt64(&c.seen) == seen
   160  }
   161  
   162  // Run the Consul main loop which retries operations against Consul. It should
   163  // be called exactly once.
   164  func (c *ServiceClient) Run() {
   165  	defer close(c.exitCh)
   166  	retryTimer := time.NewTimer(0)
   167  	<-retryTimer.C // disabled by default
   168  	failures := 0
   169  	for {
   170  		select {
   171  		case <-retryTimer.C:
   172  		case <-c.shutdownCh:
   173  		case ops := <-c.opCh:
   174  			c.merge(ops)
   175  		}
   176  
   177  		if err := c.sync(); err != nil {
   178  			if failures == 0 {
   179  				c.logger.Printf("[WARN] consul.sync: failed to update services in Consul: %v", err)
   180  			}
   181  			failures++
   182  			if !retryTimer.Stop() {
   183  				// Timer already expired, since the timer may
   184  				// or may not have been read in the select{}
   185  				// above, conditionally receive on it
   186  				select {
   187  				case <-retryTimer.C:
   188  				default:
   189  				}
   190  			}
   191  			backoff := c.retryInterval * time.Duration(failures)
   192  			if backoff > c.maxRetryInterval {
   193  				backoff = c.maxRetryInterval
   194  			}
   195  			retryTimer.Reset(backoff)
   196  		} else {
   197  			if failures > 0 {
   198  				c.logger.Printf("[INFO] consul.sync: successfully updated services in Consul")
   199  				failures = 0
   200  			}
   201  		}
   202  
   203  		select {
   204  		case <-c.shutdownCh:
   205  			// Exit only after sync'ing all outstanding operations
   206  			if len(c.opCh) > 0 {
   207  				for len(c.opCh) > 0 {
   208  					c.merge(<-c.opCh)
   209  				}
   210  				continue
   211  			}
   212  			return
   213  		default:
   214  		}
   215  
   216  	}
   217  }
   218  
   219  // commit operations unless already shutting down.
   220  func (c *ServiceClient) commit(ops *operations) {
   221  	select {
   222  	case c.opCh <- ops:
   223  	case <-c.shutdownCh:
   224  	}
   225  }
   226  
   227  // merge registrations into state map prior to sync'ing with Consul
   228  func (c *ServiceClient) merge(ops *operations) {
   229  	for _, s := range ops.regServices {
   230  		c.services[s.ID] = s
   231  	}
   232  	for _, check := range ops.regChecks {
   233  		c.checks[check.ID] = check
   234  	}
   235  	for _, s := range ops.scripts {
   236  		c.scripts[s.id] = s
   237  	}
   238  	for _, sid := range ops.deregServices {
   239  		delete(c.services, sid)
   240  	}
   241  	for _, cid := range ops.deregChecks {
   242  		if script, ok := c.runningScripts[cid]; ok {
   243  			script.cancel()
   244  			delete(c.scripts, cid)
   245  			delete(c.runningScripts, cid)
   246  		}
   247  		delete(c.checks, cid)
   248  	}
   249  	metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
   250  	metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
   251  	metrics.SetGauge([]string{"client", "consul", "script_checks"}, float32(len(c.runningScripts)))
   252  }
   253  
   254  // sync enqueued operations.
   255  func (c *ServiceClient) sync() error {
   256  	sreg, creg, sdereg, cdereg := 0, 0, 0, 0
   257  
   258  	consulServices, err := c.client.Services()
   259  	if err != nil {
   260  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   261  		return fmt.Errorf("error querying Consul services: %v", err)
   262  	}
   263  
   264  	consulChecks, err := c.client.Checks()
   265  	if err != nil {
   266  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   267  		return fmt.Errorf("error querying Consul checks: %v", err)
   268  	}
   269  
   270  	// Remove Nomad services in Consul but unknown locally
   271  	for id := range consulServices {
   272  		if _, ok := c.services[id]; ok {
   273  			// Known service, skip
   274  			continue
   275  		}
   276  		if !isNomadService(id) {
   277  			// Not managed by Nomad, skip
   278  			continue
   279  		}
   280  		// Unknown Nomad managed service; kill
   281  		if err := c.client.ServiceDeregister(id); err != nil {
   282  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   283  			return err
   284  		}
   285  		sdereg++
   286  		metrics.IncrCounter([]string{"client", "consul", "service_deregisrations"}, 1)
   287  	}
   288  
   289  	// Track services whose ports have changed as their checks may also
   290  	// need updating
   291  	portsChanged := make(map[string]struct{}, len(c.services))
   292  
   293  	// Add Nomad services missing from Consul
   294  	for id, locals := range c.services {
   295  		if remotes, ok := consulServices[id]; ok {
   296  			// Make sure Port and Address are stable since
   297  			// PortLabel and AddressMode aren't included in the
   298  			// service ID.
   299  			if locals.Port == remotes.Port && locals.Address == remotes.Address {
   300  				// Already exists in Consul; skip
   301  				continue
   302  			}
   303  			// Port changed, reregister it and its checks
   304  			portsChanged[id] = struct{}{}
   305  		}
   306  		if err = c.client.ServiceRegister(locals); err != nil {
   307  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   308  			return err
   309  		}
   310  		sreg++
   311  		metrics.IncrCounter([]string{"client", "consul", "service_regisrations"}, 1)
   312  	}
   313  
   314  	// Remove Nomad checks in Consul but unknown locally
   315  	for id, check := range consulChecks {
   316  		if _, ok := c.checks[id]; ok {
   317  			// Known check, leave it
   318  			continue
   319  		}
   320  		if !isNomadService(check.ServiceID) {
   321  			// Service not managed by Nomad, skip
   322  			continue
   323  		}
   324  		// Unknown Nomad managed check; kill
   325  		if err := c.client.CheckDeregister(id); err != nil {
   326  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   327  			return err
   328  		}
   329  		cdereg++
   330  		metrics.IncrCounter([]string{"client", "consul", "check_deregisrations"}, 1)
   331  	}
   332  
   333  	// Add Nomad checks missing from Consul
   334  	for id, check := range c.checks {
   335  		if check, ok := consulChecks[id]; ok {
   336  			if _, changed := portsChanged[check.ServiceID]; !changed {
   337  				// Already in Consul and ports didn't change; skipping
   338  				continue
   339  			}
   340  		}
   341  		if err := c.client.CheckRegister(check); err != nil {
   342  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   343  			return err
   344  		}
   345  		creg++
   346  		metrics.IncrCounter([]string{"client", "consul", "check_regisrations"}, 1)
   347  
   348  		// Handle starting scripts
   349  		if script, ok := c.scripts[id]; ok {
   350  			// If it's already running, cancel and replace
   351  			if oldScript, running := c.runningScripts[id]; running {
   352  				oldScript.cancel()
   353  			}
   354  			// Start and store the handle
   355  			c.runningScripts[id] = script.run()
   356  		}
   357  	}
   358  
   359  	// A Consul operation has succeeded, mark Consul as having been seen
   360  	c.markSeen()
   361  
   362  	c.logger.Printf("[DEBUG] consul.sync: registered %d services, %d checks; deregistered %d services, %d checks",
   363  		sreg, creg, sdereg, cdereg)
   364  	return nil
   365  }
   366  
   367  // RegisterAgent registers Nomad agents (client or server). The
   368  // Service.PortLabel should be a literal port to be parsed with SplitHostPort.
   369  // Script checks are not supported and will return an error. Registration is
   370  // asynchronous.
   371  //
   372  // Agents will be deregistered when Shutdown is called.
   373  func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error {
   374  	ops := operations{}
   375  
   376  	for _, service := range services {
   377  		id := makeAgentServiceID(role, service)
   378  
   379  		// Unlike tasks, agents don't use port labels. Agent ports are
   380  		// stored directly in the PortLabel.
   381  		host, rawport, err := net.SplitHostPort(service.PortLabel)
   382  		if err != nil {
   383  			return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err)
   384  		}
   385  		port, err := strconv.Atoi(rawport)
   386  		if err != nil {
   387  			return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err)
   388  		}
   389  		serviceReg := &api.AgentServiceRegistration{
   390  			ID:      id,
   391  			Name:    service.Name,
   392  			Tags:    service.Tags,
   393  			Address: host,
   394  			Port:    port,
   395  		}
   396  		ops.regServices = append(ops.regServices, serviceReg)
   397  
   398  		for _, check := range service.Checks {
   399  			checkID := makeCheckID(id, check)
   400  			if check.Type == structs.ServiceCheckScript {
   401  				return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name)
   402  			}
   403  			checkHost, checkPort := serviceReg.Address, serviceReg.Port
   404  			if check.PortLabel != "" {
   405  				// Unlike tasks, agents don't use port labels. Agent ports are
   406  				// stored directly in the PortLabel.
   407  				host, rawport, err := net.SplitHostPort(check.PortLabel)
   408  				if err != nil {
   409  					return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err)
   410  				}
   411  				port, err := strconv.Atoi(rawport)
   412  				if err != nil {
   413  					return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err)
   414  				}
   415  				checkHost, checkPort = host, port
   416  			}
   417  			checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort)
   418  			if err != nil {
   419  				return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   420  			}
   421  			ops.regChecks = append(ops.regChecks, checkReg)
   422  		}
   423  	}
   424  
   425  	// Don't bother committing agent checks if we're already shutting down
   426  	c.agentLock.Lock()
   427  	defer c.agentLock.Unlock()
   428  	select {
   429  	case <-c.shutdownCh:
   430  		return nil
   431  	default:
   432  	}
   433  
   434  	// Now add them to the registration queue
   435  	c.commit(&ops)
   436  
   437  	// Record IDs for deregistering on shutdown
   438  	for _, id := range ops.regServices {
   439  		c.agentServices[id.ID] = struct{}{}
   440  	}
   441  	for _, id := range ops.regChecks {
   442  		c.agentChecks[id.ID] = struct{}{}
   443  	}
   444  	return nil
   445  }
   446  
   447  // serviceRegs creates service registrations, check registrations, and script
   448  // checks from a service.
   449  func (c *ServiceClient) serviceRegs(ops *operations, allocID string, service *structs.Service,
   450  	task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   451  
   452  	id := makeTaskServiceID(allocID, task.Name, service)
   453  	addrMode := service.AddressMode
   454  	if addrMode == structs.AddressModeAuto {
   455  		if net.Advertise() {
   456  			addrMode = structs.AddressModeDriver
   457  		} else {
   458  			// No driver network or shouldn't default to driver's network
   459  			addrMode = structs.AddressModeHost
   460  		}
   461  	}
   462  	ip, port := task.Resources.Networks.Port(service.PortLabel)
   463  	if addrMode == structs.AddressModeDriver {
   464  		if net == nil {
   465  			return fmt.Errorf("service %s cannot use driver's IP because driver didn't set one", service.Name)
   466  		}
   467  		ip = net.IP
   468  		port = net.PortMap[service.PortLabel]
   469  	}
   470  	serviceReg := &api.AgentServiceRegistration{
   471  		ID:      id,
   472  		Name:    service.Name,
   473  		Tags:    make([]string, len(service.Tags)),
   474  		Address: ip,
   475  		Port:    port,
   476  	}
   477  	// copy isn't strictly necessary but can avoid bugs especially
   478  	// with tests that may reuse Tasks
   479  	copy(serviceReg.Tags, service.Tags)
   480  	ops.regServices = append(ops.regServices, serviceReg)
   481  	return c.checkRegs(ops, allocID, id, service, task, exec, net)
   482  }
   483  
   484  func (c *ServiceClient) checkRegs(ops *operations, allocID, serviceID string, service *structs.Service,
   485  	task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   486  
   487  	for _, check := range service.Checks {
   488  		if check.TLSSkipVerify && !c.skipVerifySupport {
   489  			c.logger.Printf("[WARN] consul.sync: skipping check %q for task %q alloc %q because Consul doesn't support tls_skip_verify. Please upgrade to Consul >= 0.7.2.",
   490  				check.Name, task.Name, allocID)
   491  			continue
   492  		}
   493  		checkID := makeCheckID(serviceID, check)
   494  		if check.Type == structs.ServiceCheckScript {
   495  			if exec == nil {
   496  				return fmt.Errorf("driver doesn't support script checks")
   497  			}
   498  			ops.scripts = append(ops.scripts, newScriptCheck(
   499  				allocID, task.Name, checkID, check, exec, c.client, c.logger, c.shutdownCh))
   500  
   501  		}
   502  
   503  		// Checks should always use the host ip:port
   504  		portLabel := check.PortLabel
   505  		if portLabel == "" {
   506  			// Default to the service's port label
   507  			portLabel = service.PortLabel
   508  		}
   509  		ip, port := task.Resources.Networks.Port(portLabel)
   510  		checkReg, err := createCheckReg(serviceID, checkID, check, ip, port)
   511  		if err != nil {
   512  			return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   513  		}
   514  		ops.regChecks = append(ops.regChecks, checkReg)
   515  	}
   516  	return nil
   517  }
   518  
   519  // RegisterTask with Consul. Adds all sevice entries and checks to Consul. If
   520  // exec is nil and a script check exists an error is returned.
   521  //
   522  // If the service IP is set it used as the address in the service registration.
   523  // Checks will always use the IP from the Task struct (host's IP).
   524  //
   525  // Actual communication with Consul is done asynchrously (see Run).
   526  func (c *ServiceClient) RegisterTask(allocID string, task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   527  	ops := &operations{}
   528  	for _, service := range task.Services {
   529  		if err := c.serviceRegs(ops, allocID, service, task, exec, net); err != nil {
   530  			return err
   531  		}
   532  	}
   533  	c.commit(ops)
   534  	return nil
   535  }
   536  
   537  // UpdateTask in Consul. Does not alter the service if only checks have
   538  // changed.
   539  //
   540  // DriverNetwork must not change between invocations for the same allocation.
   541  func (c *ServiceClient) UpdateTask(allocID string, existing, newTask *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error {
   542  	ops := &operations{}
   543  
   544  	existingIDs := make(map[string]*structs.Service, len(existing.Services))
   545  	for _, s := range existing.Services {
   546  		existingIDs[makeTaskServiceID(allocID, existing.Name, s)] = s
   547  	}
   548  	newIDs := make(map[string]*structs.Service, len(newTask.Services))
   549  	for _, s := range newTask.Services {
   550  		newIDs[makeTaskServiceID(allocID, newTask.Name, s)] = s
   551  	}
   552  
   553  	// Loop over existing Service IDs to see if they have been removed or
   554  	// updated.
   555  	for existingID, existingSvc := range existingIDs {
   556  		newSvc, ok := newIDs[existingID]
   557  		if !ok {
   558  			// Existing sevice entry removed
   559  			ops.deregServices = append(ops.deregServices, existingID)
   560  			for _, check := range existingSvc.Checks {
   561  				ops.deregChecks = append(ops.deregChecks, makeCheckID(existingID, check))
   562  			}
   563  			continue
   564  		}
   565  
   566  		// PortLabel and AddressMode aren't included in the ID, so we
   567  		// have to compare manually.
   568  		serviceUnchanged := newSvc.PortLabel == existingSvc.PortLabel && newSvc.AddressMode == existingSvc.AddressMode
   569  		if serviceUnchanged {
   570  			// Service exists and hasn't changed, don't add it later
   571  			delete(newIDs, existingID)
   572  		}
   573  
   574  		// Check to see what checks were updated
   575  		existingChecks := make(map[string]struct{}, len(existingSvc.Checks))
   576  		for _, check := range existingSvc.Checks {
   577  			existingChecks[makeCheckID(existingID, check)] = struct{}{}
   578  		}
   579  
   580  		// Register new checks
   581  		for _, check := range newSvc.Checks {
   582  			checkID := makeCheckID(existingID, check)
   583  			if _, exists := existingChecks[checkID]; exists {
   584  				// Check exists, so don't remove it
   585  				delete(existingChecks, checkID)
   586  			} else if serviceUnchanged {
   587  				// New check on an unchanged service; add them now
   588  				err := c.checkRegs(ops, allocID, existingID, newSvc, newTask, exec, net)
   589  				if err != nil {
   590  					return err
   591  				}
   592  			}
   593  		}
   594  
   595  		// Remove existing checks not in updated service
   596  		for cid := range existingChecks {
   597  			ops.deregChecks = append(ops.deregChecks, cid)
   598  		}
   599  	}
   600  
   601  	// Any remaining services should just be enqueued directly
   602  	for _, newSvc := range newIDs {
   603  		err := c.serviceRegs(ops, allocID, newSvc, newTask, exec, net)
   604  		if err != nil {
   605  			return err
   606  		}
   607  	}
   608  
   609  	c.commit(ops)
   610  	return nil
   611  }
   612  
   613  // RemoveTask from Consul. Removes all service entries and checks.
   614  //
   615  // Actual communication with Consul is done asynchrously (see Run).
   616  func (c *ServiceClient) RemoveTask(allocID string, task *structs.Task) {
   617  	ops := operations{}
   618  
   619  	for _, service := range task.Services {
   620  		id := makeTaskServiceID(allocID, task.Name, service)
   621  		ops.deregServices = append(ops.deregServices, id)
   622  
   623  		for _, check := range service.Checks {
   624  			ops.deregChecks = append(ops.deregChecks, makeCheckID(id, check))
   625  		}
   626  	}
   627  
   628  	// Now add them to the deregistration fields; main Run loop will update
   629  	c.commit(&ops)
   630  }
   631  
   632  // Checks returns the checks registered against the agent for the given
   633  // allocation.
   634  func (c *ServiceClient) Checks(a *structs.Allocation) ([]*api.AgentCheck, error) {
   635  	tg := a.Job.LookupTaskGroup(a.TaskGroup)
   636  	if tg == nil {
   637  		return nil, fmt.Errorf("failed to find task group in alloc")
   638  	}
   639  
   640  	// Determine the checks that are relevant
   641  	relevant := make(map[string]struct{}, 4)
   642  	for _, task := range tg.Tasks {
   643  		for _, service := range task.Services {
   644  			id := makeTaskServiceID(a.ID, task.Name, service)
   645  			for _, check := range service.Checks {
   646  				relevant[makeCheckID(id, check)] = struct{}{}
   647  			}
   648  		}
   649  	}
   650  
   651  	// Query all the checks
   652  	checks, err := c.client.Checks()
   653  	if err != nil {
   654  		return nil, err
   655  	}
   656  
   657  	allocChecks := make([]*api.AgentCheck, 0, len(relevant))
   658  	for checkID := range relevant {
   659  		if check, ok := checks[checkID]; ok {
   660  			allocChecks = append(allocChecks, check)
   661  		}
   662  	}
   663  
   664  	return allocChecks, nil
   665  }
   666  
   667  // Shutdown the Consul client. Update running task registations and deregister
   668  // agent from Consul. On first call blocks up to shutdownWait before giving up
   669  // on syncing operations.
   670  func (c *ServiceClient) Shutdown() error {
   671  	// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
   672  	// entries.
   673  	c.agentLock.Lock()
   674  	defer c.agentLock.Unlock()
   675  	select {
   676  	case <-c.shutdownCh:
   677  		return nil
   678  	default:
   679  		close(c.shutdownCh)
   680  	}
   681  
   682  	// Give run loop time to sync, but don't block indefinitely
   683  	deadline := time.After(c.shutdownWait)
   684  
   685  	// Wait for Run to finish any outstanding operations and exit
   686  	select {
   687  	case <-c.exitCh:
   688  	case <-deadline:
   689  		// Don't wait forever though
   690  	}
   691  
   692  	// If Consul was never seen nothing could be written so exit early
   693  	if !c.hasSeen() {
   694  		return nil
   695  	}
   696  
   697  	// Always attempt to deregister Nomad agent Consul entries, even if
   698  	// deadline was reached
   699  	for id := range c.agentServices {
   700  		if err := c.client.ServiceDeregister(id); err != nil {
   701  			c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err)
   702  		}
   703  	}
   704  	for id := range c.agentChecks {
   705  		if err := c.client.CheckDeregister(id); err != nil {
   706  			c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err)
   707  		}
   708  	}
   709  
   710  	// Give script checks time to exit (no need to lock as Run() has exited)
   711  	for _, h := range c.runningScripts {
   712  		select {
   713  		case <-h.wait():
   714  		case <-deadline:
   715  			return fmt.Errorf("timed out waiting for script checks to run")
   716  		}
   717  	}
   718  	return nil
   719  }
   720  
   721  // makeAgentServiceID creates a unique ID for identifying an agent service in
   722  // Consul.
   723  //
   724  // Agent service IDs are of the form:
   725  //
   726  //	{nomadServicePrefix}-{ROLE}-{Service.Name}-{Service.Tags...}
   727  //	Example Server ID: _nomad-server-nomad-serf
   728  //	Example Client ID: _nomad-client-nomad-client-http
   729  //
   730  func makeAgentServiceID(role string, service *structs.Service) string {
   731  	parts := make([]string, len(service.Tags)+3)
   732  	parts[0] = nomadServicePrefix
   733  	parts[1] = role
   734  	parts[2] = service.Name
   735  	copy(parts[3:], service.Tags)
   736  	return strings.Join(parts, "-")
   737  }
   738  
   739  // makeTaskServiceID creates a unique ID for identifying a task service in
   740  // Consul.
   741  //
   742  // Task service IDs are of the form:
   743  //
   744  //	{nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
   745  //	Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
   746  //
   747  func makeTaskServiceID(allocID, taskName string, service *structs.Service) string {
   748  	parts := make([]string, len(service.Tags)+5)
   749  	parts[0] = nomadServicePrefix
   750  	parts[1] = "executor"
   751  	parts[2] = allocID
   752  	parts[3] = taskName
   753  	parts[4] = service.Name
   754  	copy(parts[5:], service.Tags)
   755  	return strings.Join(parts, "-")
   756  }
   757  
   758  // makeCheckID creates a unique ID for a check.
   759  func makeCheckID(serviceID string, check *structs.ServiceCheck) string {
   760  	return check.Hash(serviceID)
   761  }
   762  
   763  // createCheckReg creates a Check that can be registered with Consul.
   764  //
   765  // Script checks simply have a TTL set and the caller is responsible for
   766  // running the script and heartbeating.
   767  func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) {
   768  	chkReg := api.AgentCheckRegistration{
   769  		ID:        checkID,
   770  		Name:      check.Name,
   771  		ServiceID: serviceID,
   772  	}
   773  	chkReg.Status = check.InitialStatus
   774  	chkReg.Timeout = check.Timeout.String()
   775  	chkReg.Interval = check.Interval.String()
   776  
   777  	switch check.Type {
   778  	case structs.ServiceCheckHTTP:
   779  		proto := check.Protocol
   780  		if proto == "" {
   781  			proto = "http"
   782  		}
   783  		if check.TLSSkipVerify {
   784  			chkReg.TLSSkipVerify = true
   785  		}
   786  		base := url.URL{
   787  			Scheme: proto,
   788  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
   789  		}
   790  		relative, err := url.Parse(check.Path)
   791  		if err != nil {
   792  			return nil, err
   793  		}
   794  		url := base.ResolveReference(relative)
   795  		chkReg.HTTP = url.String()
   796  	case structs.ServiceCheckTCP:
   797  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
   798  	case structs.ServiceCheckScript:
   799  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
   800  	default:
   801  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
   802  	}
   803  	return &chkReg, nil
   804  }
   805  
   806  // isNomadService returns true if the ID matches the pattern of a Nomad managed
   807  // service. Agent services return false as independent client and server agents
   808  // may be running on the same machine. #2827
   809  func isNomadService(id string) bool {
   810  	const prefix = nomadServicePrefix + "-executor"
   811  	return strings.HasPrefix(id, prefix)
   812  }