github.com/adityamillind98/nomad@v0.11.8/command/agent/consul/client.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"net/url"
     8  	"reflect"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	metrics "github.com/armon/go-metrics"
    16  	log "github.com/hashicorp/go-hclog"
    17  
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/nomad/helper"
    20  	"github.com/hashicorp/nomad/nomad/structs"
    21  	"github.com/hashicorp/nomad/plugins/drivers"
    22  )
    23  
    24  const (
    25  	// nomadServicePrefix is the prefix that scopes all Nomad registered
    26  	// services (both agent and task entries).
    27  	nomadServicePrefix = "_nomad"
    28  
    29  	// nomadTaskPrefix is the prefix that scopes Nomad registered services
    30  	// for tasks.
    31  	nomadTaskPrefix = nomadServicePrefix + "-task-"
    32  
    33  	// nomadCheckPrefix is the prefix that scopes Nomad registered checks for
    34  	// services.
    35  	nomadCheckPrefix = nomadServicePrefix + "-check-"
    36  
    37  	// defaultRetryInterval is how quickly to retry syncing services and
    38  	// checks to Consul when an error occurs. Will backoff up to a max.
    39  	defaultRetryInterval = time.Second
    40  
    41  	// defaultMaxRetryInterval is the default max retry interval.
    42  	defaultMaxRetryInterval = 30 * time.Second
    43  
    44  	// defaultPeriodicalInterval is the interval at which the service
    45  	// client reconciles state between the desired services and checks and
    46  	// what's actually registered in Consul. This is done at an interval,
    47  	// rather than being purely edge triggered, to handle the case that the
    48  	// Consul agent's state may change underneath us
    49  	defaultPeriodicInterval = 30 * time.Second
    50  
    51  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    52  	// the check result
    53  	ttlCheckBuffer = 31 * time.Second
    54  
    55  	// defaultShutdownWait is how long Shutdown() should block waiting for
    56  	// enqueued operations to sync to Consul by default.
    57  	defaultShutdownWait = time.Minute
    58  
    59  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    60  	// spend waiting for a response from a Consul Query.
    61  	DefaultQueryWaitDuration = 2 * time.Second
    62  
    63  	// ServiceTagHTTP is the tag assigned to HTTP services
    64  	ServiceTagHTTP = "http"
    65  
    66  	// ServiceTagRPC is the tag assigned to RPC services
    67  	ServiceTagRPC = "rpc"
    68  
    69  	// ServiceTagSerf is the tag assigned to Serf services
    70  	ServiceTagSerf = "serf"
    71  
    72  	// deregisterProbationPeriod is the initialization period where
    73  	// services registered in Consul but not in Nomad don't get deregistered,
    74  	// to allow for nomad restoring tasks
    75  	deregisterProbationPeriod = time.Minute
    76  )
    77  
    78  // Additional Consul ACLs required
    79  // - Consul Template: key:read
    80  //   Used in tasks with template stanza that use Consul keys.
    81  
    82  // CatalogAPI is the consul/api.Catalog API used by Nomad.
    83  //
    84  // ACL requirements
    85  // - node:read (listing datacenters)
    86  // - service:read
    87  type CatalogAPI interface {
    88  	Datacenters() ([]string, error)
    89  	Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error)
    90  }
    91  
    92  // AgentAPI is the consul/api.Agent API used by Nomad.
    93  //
    94  // ACL requirements
    95  // - agent:read
    96  // - service:write
    97  type AgentAPI interface {
    98  	Services() (map[string]*api.AgentService, error)
    99  	Checks() (map[string]*api.AgentCheck, error)
   100  	CheckRegister(check *api.AgentCheckRegistration) error
   101  	CheckDeregister(checkID string) error
   102  	Self() (map[string]map[string]interface{}, error)
   103  	ServiceRegister(service *api.AgentServiceRegistration) error
   104  	ServiceDeregister(serviceID string) error
   105  	UpdateTTL(id, output, status string) error
   106  }
   107  
   108  // ACLsAPI is the consul/api.ACL API subset used by Nomad Server.
   109  //
   110  // ACL requirements
   111  // - acl:write (server only)
   112  type ACLsAPI interface {
   113  	// We are looking up by [operator token] SecretID, which implies we need
   114  	// to use this method instead of the normal TokenRead, which can only be
   115  	// used to lookup tokens by their AccessorID.
   116  	TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error)
   117  	PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error)
   118  	RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error)
   119  	TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error)
   120  	TokenDelete(accessorID string, q *api.WriteOptions) (*api.WriteMeta, error)
   121  	TokenList(q *api.QueryOptions) ([]*api.ACLTokenListEntry, *api.QueryMeta, error)
   122  }
   123  
   124  // agentServiceUpdateRequired checks if any critical fields in Nomad's version
   125  // of a service definition are different from the existing service definition as
   126  // known by Consul.
   127  //
   128  //  reason - The syncReason that triggered this synchronization with the consul
   129  //           agent API.
   130  //  wanted - Nomad's view of what the service definition is intended to be.
   131  //           Not nil.
   132  //  existing - Consul's view (agent, not catalog) of the actual service definition.
   133  //           Not nil.
   134  //  sidecar - Consul's view (agent, not catalog) of the service definition of the sidecar
   135  //           associated with existing that may or may not exist.
   136  //           May be nil.
   137  func agentServiceUpdateRequired(reason syncReason, wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool {
   138  	switch reason {
   139  	case syncPeriodic:
   140  		// In a periodic sync with Consul, we need to respect the value of
   141  		// the enable_tag_override field so that we maintain the illusion that the
   142  		// user is in control of the Consul tags, as they may be externally edited
   143  		// via the Consul catalog API (e.g. a user manually sets them).
   144  		//
   145  		// As Consul does by disabling anti-entropy for the tags field, Nomad will
   146  		// ignore differences in the tags field during the periodic syncs with
   147  		// the Consul agent API.
   148  		//
   149  		// We do so by over-writing the nomad service registration by the value
   150  		// of the tags that Consul contains, if enable_tag_override = true.
   151  		maybeTweakTags(wanted, existing, sidecar)
   152  		return different(wanted, existing, sidecar)
   153  
   154  	default:
   155  		// A non-periodic sync with Consul indicates an operation has been set
   156  		// on the queue. This happens when service has been added / removed / modified
   157  		// and implies the Consul agent should be sync'd with nomad, because
   158  		// nomad is the ultimate source of truth for the service definition.
   159  		return different(wanted, existing, sidecar)
   160  	}
   161  }
   162  
   163  // maybeTweakTags will override wanted.Tags with a copy of existing.Tags only if
   164  // EnableTagOverride is true. Otherwise the wanted service registration is left
   165  // unchanged.
   166  func maybeTweakTags(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) {
   167  	if wanted.EnableTagOverride {
   168  		wanted.Tags = helper.CopySliceString(existing.Tags)
   169  		// If the service registration also defines a sidecar service, use the ETO
   170  		// setting for the parent service to also apply to the sidecar.
   171  		if wanted.Connect != nil && wanted.Connect.SidecarService != nil {
   172  			if sidecar != nil {
   173  				wanted.Connect.SidecarService.Tags = helper.CopySliceString(sidecar.Tags)
   174  			}
   175  		}
   176  	}
   177  }
   178  
   179  // different compares the wanted state of the service registration with the actual
   180  // (cached) state of the service registration reported by Consul. If any of the
   181  // critical fields are not deeply equal, they considered different.
   182  func different(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool {
   183  
   184  	return !(wanted.Kind == existing.Kind &&
   185  		wanted.ID == existing.ID &&
   186  		wanted.Port == existing.Port &&
   187  		wanted.Address == existing.Address &&
   188  		wanted.Name == existing.Service &&
   189  		wanted.EnableTagOverride == existing.EnableTagOverride &&
   190  		reflect.DeepEqual(wanted.Meta, existing.Meta) &&
   191  		reflect.DeepEqual(wanted.Tags, existing.Tags) &&
   192  		!connectSidecarDifferent(wanted, sidecar))
   193  }
   194  
   195  func connectSidecarDifferent(wanted *api.AgentServiceRegistration, sidecar *api.AgentService) bool {
   196  	if wanted.Connect != nil && wanted.Connect.SidecarService != nil {
   197  		if sidecar == nil {
   198  			// consul lost our sidecar (?)
   199  			return true
   200  		}
   201  		if !reflect.DeepEqual(wanted.Connect.SidecarService.Tags, sidecar.Tags) {
   202  			// tags on the nomad definition have been modified
   203  			return true
   204  		}
   205  	}
   206  
   207  	// There is no connect sidecar the nomad side; let consul anti-entropy worry
   208  	// about any registration on the consul side.
   209  	return false
   210  }
   211  
   212  // operations are submitted to the main loop via commit() for synchronizing
   213  // with Consul.
   214  type operations struct {
   215  	regServices   []*api.AgentServiceRegistration
   216  	regChecks     []*api.AgentCheckRegistration
   217  	deregServices []string
   218  	deregChecks   []string
   219  }
   220  
   221  // AllocRegistration holds the status of services registered for a particular
   222  // allocations by task.
   223  type AllocRegistration struct {
   224  	// Tasks maps the name of a task to its registered services and checks
   225  	Tasks map[string]*ServiceRegistrations
   226  }
   227  
   228  func (a *AllocRegistration) copy() *AllocRegistration {
   229  	c := &AllocRegistration{
   230  		Tasks: make(map[string]*ServiceRegistrations, len(a.Tasks)),
   231  	}
   232  
   233  	for k, v := range a.Tasks {
   234  		c.Tasks[k] = v.copy()
   235  	}
   236  
   237  	return c
   238  }
   239  
   240  // NumServices returns the number of registered services
   241  func (a *AllocRegistration) NumServices() int {
   242  	if a == nil {
   243  		return 0
   244  	}
   245  
   246  	total := 0
   247  	for _, treg := range a.Tasks {
   248  		for _, sreg := range treg.Services {
   249  			if sreg.Service != nil {
   250  				total++
   251  			}
   252  		}
   253  	}
   254  
   255  	return total
   256  }
   257  
   258  // NumChecks returns the number of registered checks
   259  func (a *AllocRegistration) NumChecks() int {
   260  	if a == nil {
   261  		return 0
   262  	}
   263  
   264  	total := 0
   265  	for _, treg := range a.Tasks {
   266  		for _, sreg := range treg.Services {
   267  			total += len(sreg.Checks)
   268  		}
   269  	}
   270  
   271  	return total
   272  }
   273  
   274  // ServiceRegistrations holds the status of services registered for a particular
   275  // task or task group.
   276  type ServiceRegistrations struct {
   277  	Services map[string]*ServiceRegistration
   278  }
   279  
   280  func (t *ServiceRegistrations) copy() *ServiceRegistrations {
   281  	c := &ServiceRegistrations{
   282  		Services: make(map[string]*ServiceRegistration, len(t.Services)),
   283  	}
   284  
   285  	for k, v := range t.Services {
   286  		c.Services[k] = v.copy()
   287  	}
   288  
   289  	return c
   290  }
   291  
   292  // ServiceRegistration holds the status of a registered Consul Service and its
   293  // Checks.
   294  type ServiceRegistration struct {
   295  	// serviceID and checkIDs are internal fields that track just the IDs of the
   296  	// services/checks registered in Consul. It is used to materialize the other
   297  	// fields when queried.
   298  	serviceID string
   299  	checkIDs  map[string]struct{}
   300  
   301  	// Service is the AgentService registered in Consul.
   302  	Service *api.AgentService
   303  
   304  	// Checks is the status of the registered checks.
   305  	Checks []*api.AgentCheck
   306  }
   307  
   308  func (s *ServiceRegistration) copy() *ServiceRegistration {
   309  	// Copy does not copy the external fields but only the internal fields. This
   310  	// is so that the caller of AllocRegistrations can not access the internal
   311  	// fields and that method uses these fields to populate the external fields.
   312  	return &ServiceRegistration{
   313  		serviceID: s.serviceID,
   314  		checkIDs:  helper.CopyMapStringStruct(s.checkIDs),
   315  	}
   316  }
   317  
   318  // ServiceClient handles task and agent service registration with Consul.
   319  type ServiceClient struct {
   320  	client           AgentAPI
   321  	logger           log.Logger
   322  	retryInterval    time.Duration
   323  	maxRetryInterval time.Duration
   324  	periodicInterval time.Duration
   325  
   326  	// exitCh is closed when the main Run loop exits
   327  	exitCh chan struct{}
   328  
   329  	// shutdownCh is closed when the client should shutdown
   330  	shutdownCh chan struct{}
   331  
   332  	// shutdownWait is how long Shutdown() blocks waiting for the final
   333  	// sync() to finish. Defaults to defaultShutdownWait
   334  	shutdownWait time.Duration
   335  
   336  	opCh chan *operations
   337  
   338  	services map[string]*api.AgentServiceRegistration
   339  	checks   map[string]*api.AgentCheckRegistration
   340  
   341  	explicitlyDeregisteredServices map[string]bool
   342  	explicitlyDeregisteredChecks   map[string]bool
   343  
   344  	// allocRegistrations stores the services and checks that are registered
   345  	// with Consul by allocation ID.
   346  	allocRegistrations     map[string]*AllocRegistration
   347  	allocRegistrationsLock sync.RWMutex
   348  
   349  	// agent services and checks record entries for the agent itself which
   350  	// should be removed on shutdown
   351  	agentServices map[string]struct{}
   352  	agentChecks   map[string]struct{}
   353  	agentLock     sync.Mutex
   354  
   355  	// seen is 1 if Consul has ever been seen; otherwise 0. Accessed with
   356  	// atomics.
   357  	seen int32
   358  
   359  	// deregisterProbationExpiry is the time before which consul sync shouldn't deregister
   360  	// unknown services.
   361  	// Used to mitigate risk of deleting restored services upon client restart.
   362  	deregisterProbationExpiry time.Time
   363  
   364  	// checkWatcher restarts checks that are unhealthy.
   365  	checkWatcher *checkWatcher
   366  
   367  	// isClientAgent specifies whether this Consul client is being used
   368  	// by a Nomad client.
   369  	isClientAgent bool
   370  }
   371  
   372  // NewServiceClient creates a new Consul ServiceClient from an existing Consul API
   373  // Client, logger and takes whether the client is being used by a Nomad Client agent.
   374  // When being used by a Nomad client, this Consul client reconciles all services and
   375  // checks created by Nomad on behalf of running tasks.
   376  func NewServiceClient(consulClient AgentAPI, logger log.Logger, isNomadClient bool) *ServiceClient {
   377  	logger = logger.ResetNamed("consul.sync")
   378  	return &ServiceClient{
   379  		client:                         consulClient,
   380  		logger:                         logger,
   381  		retryInterval:                  defaultRetryInterval,
   382  		maxRetryInterval:               defaultMaxRetryInterval,
   383  		periodicInterval:               defaultPeriodicInterval,
   384  		exitCh:                         make(chan struct{}),
   385  		shutdownCh:                     make(chan struct{}),
   386  		shutdownWait:                   defaultShutdownWait,
   387  		opCh:                           make(chan *operations, 8),
   388  		services:                       make(map[string]*api.AgentServiceRegistration),
   389  		checks:                         make(map[string]*api.AgentCheckRegistration),
   390  		explicitlyDeregisteredServices: make(map[string]bool),
   391  		explicitlyDeregisteredChecks:   make(map[string]bool),
   392  		allocRegistrations:             make(map[string]*AllocRegistration),
   393  		agentServices:                  make(map[string]struct{}),
   394  		agentChecks:                    make(map[string]struct{}),
   395  		checkWatcher:                   newCheckWatcher(logger, consulClient),
   396  		isClientAgent:                  isNomadClient,
   397  		deregisterProbationExpiry:      time.Now().Add(deregisterProbationPeriod),
   398  	}
   399  }
   400  
   401  // seen is used by markSeen and hasSeen
   402  const seen = 1
   403  
   404  // markSeen marks Consul as having been seen (meaning at least one operation
   405  // has succeeded).
   406  func (c *ServiceClient) markSeen() {
   407  	atomic.StoreInt32(&c.seen, seen)
   408  }
   409  
   410  // hasSeen returns true if any Consul operation has ever succeeded. Useful to
   411  // squelch errors if Consul isn't running.
   412  func (c *ServiceClient) hasSeen() bool {
   413  	return atomic.LoadInt32(&c.seen) == seen
   414  }
   415  
   416  // syncReason indicates why a sync operation with consul is about to happen.
   417  //
   418  // The trigger for a sync may have implications on the behavior of the sync itself.
   419  // In particular if a service is defined with enable_tag_override=true, the sync
   420  // should ignore changes to the service's Tags field.
   421  type syncReason byte
   422  
   423  const (
   424  	syncPeriodic = iota
   425  	syncShutdown
   426  	syncNewOps
   427  )
   428  
   429  // Run the Consul main loop which retries operations against Consul. It should
   430  // be called exactly once.
   431  func (c *ServiceClient) Run() {
   432  	defer close(c.exitCh)
   433  
   434  	ctx, cancel := context.WithCancel(context.Background())
   435  	defer cancel()
   436  
   437  	// init will be closed when Consul has been contacted
   438  	init := make(chan struct{})
   439  	go checkConsulTLSSkipVerify(ctx, c.logger, c.client, init)
   440  
   441  	// Process operations while waiting for initial contact with Consul but
   442  	// do not sync until contact has been made.
   443  INIT:
   444  	for {
   445  		select {
   446  		case <-init:
   447  			c.markSeen()
   448  			break INIT
   449  		case <-c.shutdownCh:
   450  			return
   451  		case ops := <-c.opCh:
   452  			c.merge(ops)
   453  		}
   454  	}
   455  	c.logger.Trace("able to contact Consul")
   456  
   457  	// Block until contact with Consul has been established
   458  	// Start checkWatcher
   459  	go c.checkWatcher.Run(ctx)
   460  
   461  	// Always immediately sync to reconcile Nomad and Consul's state
   462  	retryTimer := time.NewTimer(0)
   463  
   464  	failures := 0
   465  	for {
   466  		// On every iteration take note of what the trigger for the next sync
   467  		// was, so that it may be referenced during the sync itself.
   468  		var reasonForSync syncReason
   469  
   470  		select {
   471  		case <-retryTimer.C:
   472  			reasonForSync = syncPeriodic
   473  		case <-c.shutdownCh:
   474  			reasonForSync = syncShutdown
   475  			// Cancel check watcher but sync one last time
   476  			cancel()
   477  		case ops := <-c.opCh:
   478  			reasonForSync = syncNewOps
   479  			c.merge(ops)
   480  		}
   481  
   482  		if err := c.sync(reasonForSync); err != nil {
   483  			if failures == 0 {
   484  				// Log on the first failure
   485  				c.logger.Warn("failed to update services in Consul", "error", err)
   486  			} else if failures%10 == 0 {
   487  				// Log every 10th consecutive failure
   488  				c.logger.Error("still unable to update services in Consul", "failures", failures, "error", err)
   489  			}
   490  
   491  			failures++
   492  			if !retryTimer.Stop() {
   493  				// Timer already expired, since the timer may
   494  				// or may not have been read in the select{}
   495  				// above, conditionally receive on it
   496  				select {
   497  				case <-retryTimer.C:
   498  				default:
   499  				}
   500  			}
   501  			backoff := c.retryInterval * time.Duration(failures)
   502  			if backoff > c.maxRetryInterval {
   503  				backoff = c.maxRetryInterval
   504  			}
   505  			retryTimer.Reset(backoff)
   506  		} else {
   507  			if failures > 0 {
   508  				c.logger.Info("successfully updated services in Consul")
   509  				failures = 0
   510  			}
   511  
   512  			// on successful sync, clear deregistered consul entities
   513  			c.clearExplicitlyDeregistered()
   514  
   515  			// Reset timer to periodic interval to periodically
   516  			// reconile with Consul
   517  			if !retryTimer.Stop() {
   518  				select {
   519  				case <-retryTimer.C:
   520  				default:
   521  				}
   522  			}
   523  			retryTimer.Reset(c.periodicInterval)
   524  		}
   525  
   526  		select {
   527  		case <-c.shutdownCh:
   528  			// Exit only after sync'ing all outstanding operations
   529  			if len(c.opCh) > 0 {
   530  				for len(c.opCh) > 0 {
   531  					c.merge(<-c.opCh)
   532  				}
   533  				continue
   534  			}
   535  			return
   536  		default:
   537  		}
   538  
   539  	}
   540  }
   541  
   542  // commit operations unless already shutting down.
   543  func (c *ServiceClient) commit(ops *operations) {
   544  	select {
   545  	case c.opCh <- ops:
   546  	case <-c.shutdownCh:
   547  	}
   548  }
   549  
   550  func (c *ServiceClient) clearExplicitlyDeregistered() {
   551  	c.explicitlyDeregisteredServices = map[string]bool{}
   552  	c.explicitlyDeregisteredChecks = map[string]bool{}
   553  }
   554  
   555  // merge registrations into state map prior to sync'ing with Consul
   556  func (c *ServiceClient) merge(ops *operations) {
   557  	for _, s := range ops.regServices {
   558  		c.services[s.ID] = s
   559  	}
   560  	for _, check := range ops.regChecks {
   561  		c.checks[check.ID] = check
   562  	}
   563  	for _, sid := range ops.deregServices {
   564  		delete(c.services, sid)
   565  		c.explicitlyDeregisteredServices[sid] = true
   566  	}
   567  	for _, cid := range ops.deregChecks {
   568  		delete(c.checks, cid)
   569  		c.explicitlyDeregisteredChecks[cid] = true
   570  	}
   571  	metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
   572  	metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
   573  }
   574  
   575  // sync enqueued operations.
   576  func (c *ServiceClient) sync(reason syncReason) error {
   577  	sreg, creg, sdereg, cdereg := 0, 0, 0, 0
   578  
   579  	consulServices, err := c.client.Services()
   580  	if err != nil {
   581  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   582  		return fmt.Errorf("error querying Consul services: %v", err)
   583  	}
   584  
   585  	consulChecks, err := c.client.Checks()
   586  	if err != nil {
   587  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   588  		return fmt.Errorf("error querying Consul checks: %v", err)
   589  	}
   590  
   591  	inProbation := time.Now().Before(c.deregisterProbationExpiry)
   592  
   593  	// Remove Nomad services in Consul but unknown locally
   594  	for id := range consulServices {
   595  		if _, ok := c.services[id]; ok {
   596  			// Known service, skip
   597  			continue
   598  		}
   599  
   600  		// Ignore if this is not a Nomad managed service. Also ignore
   601  		// Nomad managed services if this is not a client agent.
   602  		// This is to prevent server agents from removing services
   603  		// registered by client agents
   604  		if !isNomadService(id) || !c.isClientAgent {
   605  			// Not managed by Nomad, skip
   606  			continue
   607  		}
   608  
   609  		// Ignore unknown services during probation
   610  		if inProbation && !c.explicitlyDeregisteredServices[id] {
   611  			continue
   612  		}
   613  
   614  		// Ignore if this is a service for a Nomad managed sidecar proxy.
   615  		if isNomadSidecar(id, c.services) {
   616  			continue
   617  		}
   618  
   619  		// Unknown Nomad managed service; kill
   620  		if err := c.client.ServiceDeregister(id); err != nil {
   621  			if isOldNomadService(id) {
   622  				// Don't hard-fail on old entries. See #3620
   623  				continue
   624  			}
   625  
   626  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   627  			return err
   628  		}
   629  		sdereg++
   630  		metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1)
   631  	}
   632  
   633  	// Add Nomad services missing from Consul, or where the service has been updated.
   634  	for id, serviceInNomad := range c.services {
   635  
   636  		serviceInConsul, exists := consulServices[id]
   637  		sidecarInConsul := getNomadSidecar(id, consulServices)
   638  
   639  		if !exists || agentServiceUpdateRequired(reason, serviceInNomad, serviceInConsul, sidecarInConsul) {
   640  			if err = c.client.ServiceRegister(serviceInNomad); err != nil {
   641  				metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   642  				return err
   643  			}
   644  			sreg++
   645  			metrics.IncrCounter([]string{"client", "consul", "service_registrations"}, 1)
   646  		}
   647  
   648  	}
   649  
   650  	// Remove Nomad checks in Consul but unknown locally
   651  	for id, check := range consulChecks {
   652  		if _, ok := c.checks[id]; ok {
   653  			// Known check, leave it
   654  			continue
   655  		}
   656  
   657  		// Ignore if this is not a Nomad managed check. Also ignore
   658  		// Nomad managed checks if this is not a client agent.
   659  		// This is to prevent server agents from removing checks
   660  		// registered by client agents
   661  		if !isNomadService(check.ServiceID) || !c.isClientAgent || !isNomadCheck(check.CheckID) {
   662  			// Service not managed by Nomad, skip
   663  			continue
   664  		}
   665  
   666  		// Ignore unknown services during probation
   667  		if inProbation && !c.explicitlyDeregisteredChecks[id] {
   668  			continue
   669  		}
   670  
   671  		// Ignore if this is a check for a Nomad managed sidecar proxy.
   672  		if isNomadSidecar(check.ServiceID, c.services) {
   673  			continue
   674  		}
   675  
   676  		// Unknown Nomad managed check; remove
   677  		if err := c.client.CheckDeregister(id); err != nil {
   678  			if isOldNomadService(check.ServiceID) {
   679  				// Don't hard-fail on old entries.
   680  				continue
   681  			}
   682  
   683  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   684  			return err
   685  		}
   686  		cdereg++
   687  		metrics.IncrCounter([]string{"client", "consul", "check_deregistrations"}, 1)
   688  	}
   689  
   690  	// Add Nomad checks missing from Consul
   691  	for id, check := range c.checks {
   692  		if _, ok := consulChecks[id]; ok {
   693  			// Already in Consul; skipping
   694  			continue
   695  		}
   696  
   697  		if err := c.client.CheckRegister(check); err != nil {
   698  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   699  			return err
   700  		}
   701  		creg++
   702  		metrics.IncrCounter([]string{"client", "consul", "check_registrations"}, 1)
   703  	}
   704  
   705  	// Only log if something was actually synced
   706  	if sreg > 0 || sdereg > 0 || creg > 0 || cdereg > 0 {
   707  		c.logger.Debug("sync complete", "registered_services", sreg, "deregistered_services", sdereg,
   708  			"registered_checks", creg, "deregistered_checks", cdereg)
   709  	}
   710  	return nil
   711  }
   712  
   713  // RegisterAgent registers Nomad agents (client or server). The
   714  // Service.PortLabel should be a literal port to be parsed with SplitHostPort.
   715  // Script checks are not supported and will return an error. Registration is
   716  // asynchronous.
   717  //
   718  // Agents will be deregistered when Shutdown is called.
   719  func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error {
   720  	ops := operations{}
   721  
   722  	for _, service := range services {
   723  		id := makeAgentServiceID(role, service)
   724  
   725  		// Unlike tasks, agents don't use port labels. Agent ports are
   726  		// stored directly in the PortLabel.
   727  		host, rawport, err := net.SplitHostPort(service.PortLabel)
   728  		if err != nil {
   729  			return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err)
   730  		}
   731  		port, err := strconv.Atoi(rawport)
   732  		if err != nil {
   733  			return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err)
   734  		}
   735  		serviceReg := &api.AgentServiceRegistration{
   736  			ID:      id,
   737  			Name:    service.Name,
   738  			Tags:    service.Tags,
   739  			Address: host,
   740  			Port:    port,
   741  			// This enables the consul UI to show that Nomad registered this service
   742  			Meta: map[string]string{
   743  				"external-source": "nomad",
   744  			},
   745  		}
   746  		ops.regServices = append(ops.regServices, serviceReg)
   747  
   748  		for _, check := range service.Checks {
   749  			checkID := MakeCheckID(id, check)
   750  			if check.Type == structs.ServiceCheckScript {
   751  				return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name)
   752  			}
   753  			checkHost, checkPort := serviceReg.Address, serviceReg.Port
   754  			if check.PortLabel != "" {
   755  				// Unlike tasks, agents don't use port labels. Agent ports are
   756  				// stored directly in the PortLabel.
   757  				host, rawport, err := net.SplitHostPort(check.PortLabel)
   758  				if err != nil {
   759  					return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err)
   760  				}
   761  				port, err := strconv.Atoi(rawport)
   762  				if err != nil {
   763  					return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err)
   764  				}
   765  				checkHost, checkPort = host, port
   766  			}
   767  			checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort)
   768  			if err != nil {
   769  				return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   770  			}
   771  			ops.regChecks = append(ops.regChecks, checkReg)
   772  		}
   773  	}
   774  
   775  	// Don't bother committing agent checks if we're already shutting down
   776  	c.agentLock.Lock()
   777  	defer c.agentLock.Unlock()
   778  	select {
   779  	case <-c.shutdownCh:
   780  		return nil
   781  	default:
   782  	}
   783  
   784  	// Now add them to the registration queue
   785  	c.commit(&ops)
   786  
   787  	// Record IDs for deregistering on shutdown
   788  	for _, id := range ops.regServices {
   789  		c.agentServices[id.ID] = struct{}{}
   790  	}
   791  	for _, id := range ops.regChecks {
   792  		c.agentChecks[id.ID] = struct{}{}
   793  	}
   794  	return nil
   795  }
   796  
   797  // serviceRegs creates service registrations, check registrations, and script
   798  // checks from a service. It returns a service registration object with the
   799  // service and check IDs populated.
   800  func (c *ServiceClient) serviceRegs(ops *operations, service *structs.Service, workload *WorkloadServices) (
   801  	*ServiceRegistration, error) {
   802  
   803  	// Get the services ID
   804  	id := MakeAllocServiceID(workload.AllocID, workload.Name(), service)
   805  	sreg := &ServiceRegistration{
   806  		serviceID: id,
   807  		checkIDs:  make(map[string]struct{}, len(service.Checks)),
   808  	}
   809  
   810  	// Service address modes default to auto
   811  	addrMode := service.AddressMode
   812  	if addrMode == "" {
   813  		addrMode = structs.AddressModeAuto
   814  	}
   815  
   816  	// Determine the address to advertise based on the mode
   817  	ip, port, err := getAddress(addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork)
   818  	if err != nil {
   819  		return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err)
   820  	}
   821  
   822  	// Determine whether to use tags or canary_tags
   823  	var tags []string
   824  	if workload.Canary && len(service.CanaryTags) > 0 {
   825  		tags = make([]string, len(service.CanaryTags))
   826  		copy(tags, service.CanaryTags)
   827  	} else {
   828  		tags = make([]string, len(service.Tags))
   829  		copy(tags, service.Tags)
   830  	}
   831  
   832  	// newConnect returns (nil, nil) if there's no Connect-enabled service.
   833  	connect, err := newConnect(service.Name, service.Connect, workload.Networks)
   834  	if err != nil {
   835  		return nil, fmt.Errorf("invalid Consul Connect configuration for service %q: %v", service.Name, err)
   836  	}
   837  
   838  	// Determine whether to use meta or canary_meta
   839  	var meta map[string]string
   840  	if workload.Canary && len(service.CanaryMeta) > 0 {
   841  		meta = make(map[string]string, len(service.CanaryMeta)+1)
   842  		for k, v := range service.CanaryMeta {
   843  			meta[k] = v
   844  		}
   845  	} else {
   846  		meta = make(map[string]string, len(service.Meta)+1)
   847  		for k, v := range service.Meta {
   848  			meta[k] = v
   849  		}
   850  	}
   851  
   852  	// This enables the consul UI to show that Nomad registered this service
   853  	meta["external-source"] = "nomad"
   854  
   855  	// Build the Consul Service registration request
   856  	serviceReg := &api.AgentServiceRegistration{
   857  		ID:                id,
   858  		Name:              service.Name,
   859  		Tags:              tags,
   860  		EnableTagOverride: service.EnableTagOverride,
   861  		Address:           ip,
   862  		Port:              port,
   863  		Meta:              meta,
   864  		Connect:           connect, // will be nil if no Connect stanza
   865  	}
   866  	ops.regServices = append(ops.regServices, serviceReg)
   867  
   868  	// Build the check registrations
   869  	checkIDs, err := c.checkRegs(ops, id, service, workload)
   870  	if err != nil {
   871  		return nil, err
   872  	}
   873  	for _, cid := range checkIDs {
   874  		sreg.checkIDs[cid] = struct{}{}
   875  	}
   876  	return sreg, nil
   877  }
   878  
   879  // checkRegs registers the checks for the given service and returns the
   880  // registered check ids.
   881  func (c *ServiceClient) checkRegs(ops *operations, serviceID string, service *structs.Service,
   882  	workload *WorkloadServices) ([]string, error) {
   883  
   884  	// Fast path
   885  	numChecks := len(service.Checks)
   886  	if numChecks == 0 {
   887  		return nil, nil
   888  	}
   889  
   890  	checkIDs := make([]string, 0, numChecks)
   891  	for _, check := range service.Checks {
   892  		checkID := MakeCheckID(serviceID, check)
   893  		checkIDs = append(checkIDs, checkID)
   894  		if check.Type == structs.ServiceCheckScript {
   895  			// Skip getAddress for script checks
   896  			checkReg, err := createCheckReg(serviceID, checkID, check, "", 0)
   897  			if err != nil {
   898  				return nil, fmt.Errorf("failed to add script check %q: %v", check.Name, err)
   899  			}
   900  			ops.regChecks = append(ops.regChecks, checkReg)
   901  			continue
   902  		}
   903  
   904  		// Default to the service's port but allow check to override
   905  		portLabel := check.PortLabel
   906  		if portLabel == "" {
   907  			// Default to the service's port label
   908  			portLabel = service.PortLabel
   909  		}
   910  
   911  		// Checks address mode defaults to host for pre-#3380 backward compat
   912  		addrMode := check.AddressMode
   913  		if addrMode == "" {
   914  			addrMode = structs.AddressModeHost
   915  		}
   916  
   917  		ip, port, err := getAddress(addrMode, portLabel, workload.Networks, workload.DriverNetwork)
   918  		if err != nil {
   919  			return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err)
   920  		}
   921  
   922  		checkReg, err := createCheckReg(serviceID, checkID, check, ip, port)
   923  		if err != nil {
   924  			return nil, fmt.Errorf("failed to add check %q: %v", check.Name, err)
   925  		}
   926  		ops.regChecks = append(ops.regChecks, checkReg)
   927  	}
   928  	return checkIDs, nil
   929  }
   930  
   931  // RegisterWorkload with Consul. Adds all service entries and checks to Consul.
   932  //
   933  // If the service IP is set it used as the address in the service registration.
   934  // Checks will always use the IP from the Task struct (host's IP).
   935  //
   936  // Actual communication with Consul is done asynchronously (see Run).
   937  func (c *ServiceClient) RegisterWorkload(workload *WorkloadServices) error {
   938  	// Fast path
   939  	numServices := len(workload.Services)
   940  	if numServices == 0 {
   941  		return nil
   942  	}
   943  
   944  	t := new(ServiceRegistrations)
   945  	t.Services = make(map[string]*ServiceRegistration, numServices)
   946  
   947  	ops := &operations{}
   948  	for _, service := range workload.Services {
   949  		sreg, err := c.serviceRegs(ops, service, workload)
   950  		if err != nil {
   951  			return err
   952  		}
   953  		t.Services[sreg.serviceID] = sreg
   954  	}
   955  
   956  	// Add the workload to the allocation's registration
   957  	c.addRegistrations(workload.AllocID, workload.Name(), t)
   958  
   959  	c.commit(ops)
   960  
   961  	// Start watching checks. Done after service registrations are built
   962  	// since an error building them could leak watches.
   963  	for _, service := range workload.Services {
   964  		serviceID := MakeAllocServiceID(workload.AllocID, workload.Name(), service)
   965  		for _, check := range service.Checks {
   966  			if check.TriggersRestarts() {
   967  				checkID := MakeCheckID(serviceID, check)
   968  				c.checkWatcher.Watch(workload.AllocID, workload.Name(), checkID, check, workload.Restarter)
   969  			}
   970  		}
   971  	}
   972  	return nil
   973  }
   974  
   975  // UpdateWorkload in Consul. Does not alter the service if only checks have
   976  // changed.
   977  //
   978  // DriverNetwork must not change between invocations for the same allocation.
   979  func (c *ServiceClient) UpdateWorkload(old, newWorkload *WorkloadServices) error {
   980  	ops := new(operations)
   981  	regs := new(ServiceRegistrations)
   982  	regs.Services = make(map[string]*ServiceRegistration, len(newWorkload.Services))
   983  
   984  	existingIDs := make(map[string]*structs.Service, len(old.Services))
   985  	for _, s := range old.Services {
   986  		existingIDs[MakeAllocServiceID(old.AllocID, old.Name(), s)] = s
   987  	}
   988  	newIDs := make(map[string]*structs.Service, len(newWorkload.Services))
   989  	for _, s := range newWorkload.Services {
   990  		newIDs[MakeAllocServiceID(newWorkload.AllocID, newWorkload.Name(), s)] = s
   991  	}
   992  
   993  	// Loop over existing Service IDs to see if they have been removed
   994  	for existingID, existingSvc := range existingIDs {
   995  		newSvc, ok := newIDs[existingID]
   996  
   997  		if !ok {
   998  			// Existing service entry removed
   999  			ops.deregServices = append(ops.deregServices, existingID)
  1000  			for _, check := range existingSvc.Checks {
  1001  				cid := MakeCheckID(existingID, check)
  1002  				ops.deregChecks = append(ops.deregChecks, cid)
  1003  
  1004  				// Unwatch watched checks
  1005  				if check.TriggersRestarts() {
  1006  					c.checkWatcher.Unwatch(cid)
  1007  				}
  1008  			}
  1009  			continue
  1010  		}
  1011  
  1012  		oldHash := existingSvc.Hash(old.AllocID, old.Name(), old.Canary)
  1013  		newHash := newSvc.Hash(newWorkload.AllocID, newWorkload.Name(), newWorkload.Canary)
  1014  		if oldHash == newHash {
  1015  			// Service exists and hasn't changed, don't re-add it later
  1016  			delete(newIDs, existingID)
  1017  		}
  1018  
  1019  		// Service still exists so add it to the task's registration
  1020  		sreg := &ServiceRegistration{
  1021  			serviceID: existingID,
  1022  			checkIDs:  make(map[string]struct{}, len(newSvc.Checks)),
  1023  		}
  1024  		regs.Services[existingID] = sreg
  1025  
  1026  		// See if any checks were updated
  1027  		existingChecks := make(map[string]*structs.ServiceCheck, len(existingSvc.Checks))
  1028  		for _, check := range existingSvc.Checks {
  1029  			existingChecks[MakeCheckID(existingID, check)] = check
  1030  		}
  1031  
  1032  		// Register new checks
  1033  		for _, check := range newSvc.Checks {
  1034  			checkID := MakeCheckID(existingID, check)
  1035  			if _, exists := existingChecks[checkID]; exists {
  1036  				// Check is still required. Remove it from the map so it doesn't get
  1037  				// deleted later.
  1038  				delete(existingChecks, checkID)
  1039  				sreg.checkIDs[checkID] = struct{}{}
  1040  			}
  1041  
  1042  			// New check on an unchanged service; add them now
  1043  			newCheckIDs, err := c.checkRegs(ops, existingID, newSvc, newWorkload)
  1044  			if err != nil {
  1045  				return err
  1046  			}
  1047  
  1048  			for _, checkID := range newCheckIDs {
  1049  				sreg.checkIDs[checkID] = struct{}{}
  1050  			}
  1051  
  1052  			// Update all watched checks as CheckRestart fields aren't part of ID
  1053  			if check.TriggersRestarts() {
  1054  				c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
  1055  			}
  1056  		}
  1057  
  1058  		// Remove existing checks not in updated service
  1059  		for cid, check := range existingChecks {
  1060  			ops.deregChecks = append(ops.deregChecks, cid)
  1061  
  1062  			// Unwatch checks
  1063  			if check.TriggersRestarts() {
  1064  				c.checkWatcher.Unwatch(cid)
  1065  			}
  1066  		}
  1067  	}
  1068  
  1069  	// Any remaining services should just be enqueued directly
  1070  	for _, newSvc := range newIDs {
  1071  		sreg, err := c.serviceRegs(ops, newSvc, newWorkload)
  1072  		if err != nil {
  1073  			return err
  1074  		}
  1075  
  1076  		regs.Services[sreg.serviceID] = sreg
  1077  	}
  1078  
  1079  	// Add the task to the allocation's registration
  1080  	c.addRegistrations(newWorkload.AllocID, newWorkload.Name(), regs)
  1081  
  1082  	c.commit(ops)
  1083  
  1084  	// Start watching checks. Done after service registrations are built
  1085  	// since an error building them could leak watches.
  1086  	for _, service := range newIDs {
  1087  		serviceID := MakeAllocServiceID(newWorkload.AllocID, newWorkload.Name(), service)
  1088  		for _, check := range service.Checks {
  1089  			if check.TriggersRestarts() {
  1090  				checkID := MakeCheckID(serviceID, check)
  1091  				c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
  1092  			}
  1093  		}
  1094  	}
  1095  
  1096  	return nil
  1097  }
  1098  
  1099  // RemoveWorkload from Consul. Removes all service entries and checks.
  1100  //
  1101  // Actual communication with Consul is done asynchronously (see Run).
  1102  func (c *ServiceClient) RemoveWorkload(workload *WorkloadServices) {
  1103  	ops := operations{}
  1104  
  1105  	for _, service := range workload.Services {
  1106  		id := MakeAllocServiceID(workload.AllocID, workload.Name(), service)
  1107  		ops.deregServices = append(ops.deregServices, id)
  1108  
  1109  		for _, check := range service.Checks {
  1110  			cid := MakeCheckID(id, check)
  1111  			ops.deregChecks = append(ops.deregChecks, cid)
  1112  
  1113  			if check.TriggersRestarts() {
  1114  				c.checkWatcher.Unwatch(cid)
  1115  			}
  1116  		}
  1117  	}
  1118  
  1119  	// Remove the workload from the alloc's registrations
  1120  	c.removeRegistration(workload.AllocID, workload.Name())
  1121  
  1122  	// Now add them to the deregistration fields; main Run loop will update
  1123  	c.commit(&ops)
  1124  }
  1125  
  1126  // AllocRegistrations returns the registrations for the given allocation. If the
  1127  // allocation has no reservations, the response is a nil object.
  1128  func (c *ServiceClient) AllocRegistrations(allocID string) (*AllocRegistration, error) {
  1129  	// Get the internal struct using the lock
  1130  	c.allocRegistrationsLock.RLock()
  1131  	regInternal, ok := c.allocRegistrations[allocID]
  1132  	if !ok {
  1133  		c.allocRegistrationsLock.RUnlock()
  1134  		return nil, nil
  1135  	}
  1136  
  1137  	// Copy so we don't expose internal structs
  1138  	reg := regInternal.copy()
  1139  	c.allocRegistrationsLock.RUnlock()
  1140  
  1141  	// Query the services and checks to populate the allocation registrations.
  1142  	services, err := c.client.Services()
  1143  	if err != nil {
  1144  		return nil, err
  1145  	}
  1146  
  1147  	checks, err := c.client.Checks()
  1148  	if err != nil {
  1149  		return nil, err
  1150  	}
  1151  
  1152  	// Populate the object
  1153  	for _, treg := range reg.Tasks {
  1154  		for serviceID, sreg := range treg.Services {
  1155  			sreg.Service = services[serviceID]
  1156  			for checkID := range sreg.checkIDs {
  1157  				if check, ok := checks[checkID]; ok {
  1158  					sreg.Checks = append(sreg.Checks, check)
  1159  				}
  1160  			}
  1161  		}
  1162  	}
  1163  
  1164  	return reg, nil
  1165  }
  1166  
  1167  // UpdateTTL is used to update the TTL of a check. Typically this will only be
  1168  // called to heartbeat script checks.
  1169  func (c *ServiceClient) UpdateTTL(id, output, status string) error {
  1170  	return c.client.UpdateTTL(id, output, status)
  1171  }
  1172  
  1173  // Shutdown the Consul client. Update running task registrations and deregister
  1174  // agent from Consul. On first call blocks up to shutdownWait before giving up
  1175  // on syncing operations.
  1176  func (c *ServiceClient) Shutdown() error {
  1177  	// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
  1178  	// entries.
  1179  	c.agentLock.Lock()
  1180  	defer c.agentLock.Unlock()
  1181  	select {
  1182  	case <-c.shutdownCh:
  1183  		return nil
  1184  	default:
  1185  		close(c.shutdownCh)
  1186  	}
  1187  
  1188  	// Give run loop time to sync, but don't block indefinitely
  1189  	deadline := time.After(c.shutdownWait)
  1190  
  1191  	// Wait for Run to finish any outstanding operations and exit
  1192  	select {
  1193  	case <-c.exitCh:
  1194  	case <-deadline:
  1195  		// Don't wait forever though
  1196  	}
  1197  
  1198  	// If Consul was never seen nothing could be written so exit early
  1199  	if !c.hasSeen() {
  1200  		return nil
  1201  	}
  1202  
  1203  	// Always attempt to deregister Nomad agent Consul entries, even if
  1204  	// deadline was reached
  1205  	for id := range c.agentServices {
  1206  		if err := c.client.ServiceDeregister(id); err != nil {
  1207  			c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
  1208  		}
  1209  	}
  1210  	for id := range c.agentChecks {
  1211  		if err := c.client.CheckDeregister(id); err != nil {
  1212  			c.logger.Error("failed deregistering agent check", "check_id", id, "error", err)
  1213  		}
  1214  	}
  1215  
  1216  	return nil
  1217  }
  1218  
  1219  // addRegistration adds the service registrations for the given allocation.
  1220  func (c *ServiceClient) addRegistrations(allocID, taskName string, reg *ServiceRegistrations) {
  1221  	c.allocRegistrationsLock.Lock()
  1222  	defer c.allocRegistrationsLock.Unlock()
  1223  
  1224  	alloc, ok := c.allocRegistrations[allocID]
  1225  	if !ok {
  1226  		alloc = &AllocRegistration{
  1227  			Tasks: make(map[string]*ServiceRegistrations),
  1228  		}
  1229  		c.allocRegistrations[allocID] = alloc
  1230  	}
  1231  	alloc.Tasks[taskName] = reg
  1232  }
  1233  
  1234  // removeRegistrations removes the registration for the given allocation.
  1235  func (c *ServiceClient) removeRegistration(allocID, taskName string) {
  1236  	c.allocRegistrationsLock.Lock()
  1237  	defer c.allocRegistrationsLock.Unlock()
  1238  
  1239  	alloc, ok := c.allocRegistrations[allocID]
  1240  	if !ok {
  1241  		return
  1242  	}
  1243  
  1244  	// Delete the task and if it is the last one also delete the alloc's
  1245  	// registration
  1246  	delete(alloc.Tasks, taskName)
  1247  	if len(alloc.Tasks) == 0 {
  1248  		delete(c.allocRegistrations, allocID)
  1249  	}
  1250  }
  1251  
  1252  // makeAgentServiceID creates a unique ID for identifying an agent service in
  1253  // Consul.
  1254  //
  1255  // Agent service IDs are of the form:
  1256  //
  1257  //	{nomadServicePrefix}-{ROLE}-b32(sha1({Service.Name}-{Service.Tags...})
  1258  //	Example Server ID: _nomad-server-fbbk265qn4tmt25nd4ep42tjvmyj3hr4
  1259  //	Example Client ID: _nomad-client-ggnjpgl7yn7rgmvxzilmpvrzzvrszc7l
  1260  //
  1261  func makeAgentServiceID(role string, service *structs.Service) string {
  1262  	return fmt.Sprintf("%s-%s-%s", nomadServicePrefix, role, service.Hash(role, "", false))
  1263  }
  1264  
  1265  // MakeAllocServiceID creates a unique ID for identifying an alloc service in
  1266  // Consul.
  1267  //
  1268  //	Example Service ID: _nomad-task-b4e61df9-b095-d64e-f241-23860da1375f-redis-http-http
  1269  func MakeAllocServiceID(allocID, taskName string, service *structs.Service) string {
  1270  	return fmt.Sprintf("%s%s-%s-%s-%s", nomadTaskPrefix, allocID, taskName, service.Name, service.PortLabel)
  1271  }
  1272  
  1273  // MakeCheckID creates a unique ID for a check.
  1274  //
  1275  //  Example Check ID: _nomad-check-434ae42f9a57c5705344974ac38de2aee0ee089d
  1276  func MakeCheckID(serviceID string, check *structs.ServiceCheck) string {
  1277  	return fmt.Sprintf("%s%s", nomadCheckPrefix, check.Hash(serviceID))
  1278  }
  1279  
  1280  // createCheckReg creates a Check that can be registered with Consul.
  1281  //
  1282  // Script checks simply have a TTL set and the caller is responsible for
  1283  // running the script and heart-beating.
  1284  func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) {
  1285  	chkReg := api.AgentCheckRegistration{
  1286  		ID:        checkID,
  1287  		Name:      check.Name,
  1288  		ServiceID: serviceID,
  1289  	}
  1290  	chkReg.Status = check.InitialStatus
  1291  	chkReg.Timeout = check.Timeout.String()
  1292  	chkReg.Interval = check.Interval.String()
  1293  
  1294  	// Require an address for http or tcp checks
  1295  	if port == 0 && check.RequiresPort() {
  1296  		return nil, fmt.Errorf("%s checks require an address", check.Type)
  1297  	}
  1298  
  1299  	switch check.Type {
  1300  	case structs.ServiceCheckHTTP:
  1301  		proto := check.Protocol
  1302  		if proto == "" {
  1303  			proto = "http"
  1304  		}
  1305  		if check.TLSSkipVerify {
  1306  			chkReg.TLSSkipVerify = true
  1307  		}
  1308  		base := url.URL{
  1309  			Scheme: proto,
  1310  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
  1311  		}
  1312  		relative, err := url.Parse(check.Path)
  1313  		if err != nil {
  1314  			return nil, err
  1315  		}
  1316  		checkURL := base.ResolveReference(relative)
  1317  		chkReg.HTTP = checkURL.String()
  1318  		chkReg.Method = check.Method
  1319  		chkReg.Header = check.Header
  1320  
  1321  	case structs.ServiceCheckTCP:
  1322  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
  1323  
  1324  	case structs.ServiceCheckScript:
  1325  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
  1326  		// As of Consul 1.0.0 setting TTL and Interval is a 400
  1327  		chkReg.Interval = ""
  1328  
  1329  	case structs.ServiceCheckGRPC:
  1330  		chkReg.GRPC = fmt.Sprintf("%s/%s", net.JoinHostPort(host, strconv.Itoa(port)), check.GRPCService)
  1331  		chkReg.GRPCUseTLS = check.GRPCUseTLS
  1332  		if check.TLSSkipVerify {
  1333  			chkReg.TLSSkipVerify = true
  1334  		}
  1335  
  1336  	default:
  1337  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
  1338  	}
  1339  	return &chkReg, nil
  1340  }
  1341  
  1342  // isNomadCheck returns true if the ID matches the pattern of a Nomad managed
  1343  // check.
  1344  func isNomadCheck(id string) bool {
  1345  	return strings.HasPrefix(id, nomadCheckPrefix)
  1346  }
  1347  
  1348  // isNomadService returns true if the ID matches the pattern of a Nomad managed
  1349  // service (new or old formats). Agent services return false as independent
  1350  // client and server agents may be running on the same machine. #2827
  1351  func isNomadService(id string) bool {
  1352  	return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id)
  1353  }
  1354  
  1355  // isOldNomadService returns true if the ID matches an old pattern managed by
  1356  // Nomad.
  1357  //
  1358  // Pre-0.7.1 task service IDs are of the form:
  1359  //
  1360  //	{nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
  1361  //	Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
  1362  //
  1363  func isOldNomadService(id string) bool {
  1364  	const prefix = nomadServicePrefix + "-executor"
  1365  	return strings.HasPrefix(id, prefix)
  1366  }
  1367  
  1368  const (
  1369  	sidecarSuffix = "-sidecar-proxy"
  1370  )
  1371  
  1372  // isNomadSidecar returns true if the ID matches a sidecar proxy for a Nomad
  1373  // managed service.
  1374  //
  1375  // For example if you have a Connect enabled service with the ID:
  1376  //
  1377  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db
  1378  //
  1379  // Consul will create a service for the sidecar proxy with the ID:
  1380  //
  1381  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy
  1382  //
  1383  func isNomadSidecar(id string, services map[string]*api.AgentServiceRegistration) bool {
  1384  	if !strings.HasSuffix(id, sidecarSuffix) {
  1385  		return false
  1386  	}
  1387  
  1388  	// Make sure the Nomad managed service for this proxy still exists.
  1389  	_, ok := services[id[:len(id)-len(sidecarSuffix)]]
  1390  	return ok
  1391  }
  1392  
  1393  // getNomadSidecar returns the service registration of the sidecar for the managed
  1394  // service with the specified id.
  1395  //
  1396  // If the managed service of the specified id does not exist, or the service does
  1397  // not have a sidecar proxy, nil is returned.
  1398  func getNomadSidecar(id string, services map[string]*api.AgentService) *api.AgentService {
  1399  	if _, exists := services[id]; !exists {
  1400  		return nil
  1401  	}
  1402  
  1403  	sidecarID := id + sidecarSuffix
  1404  	return services[sidecarID]
  1405  }
  1406  
  1407  // getAddress returns the IP and port to use for a service or check. If no port
  1408  // label is specified (an empty value), zero values are returned because no
  1409  // address could be resolved.
  1410  func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet *drivers.DriverNetwork) (string, int, error) {
  1411  	switch addrMode {
  1412  	case structs.AddressModeAuto:
  1413  		if driverNet.Advertise() {
  1414  			addrMode = structs.AddressModeDriver
  1415  		} else {
  1416  			addrMode = structs.AddressModeHost
  1417  		}
  1418  		return getAddress(addrMode, portLabel, networks, driverNet)
  1419  	case structs.AddressModeHost:
  1420  		if portLabel == "" {
  1421  			if len(networks) != 1 {
  1422  				// If no networks are specified return zero
  1423  				// values. Consul will advertise the host IP
  1424  				// with no port. This is the pre-0.7.1 behavior
  1425  				// some people rely on.
  1426  				return "", 0, nil
  1427  			}
  1428  
  1429  			return networks[0].IP, 0, nil
  1430  		}
  1431  
  1432  		// Default path: use host ip:port
  1433  		ip, port := networks.Port(portLabel)
  1434  		if ip == "" && port <= 0 {
  1435  			return "", 0, fmt.Errorf("invalid port %q: port label not found", portLabel)
  1436  		}
  1437  		return ip, port, nil
  1438  
  1439  	case structs.AddressModeDriver:
  1440  		// Require a driver network if driver address mode is used
  1441  		if driverNet == nil {
  1442  			return "", 0, fmt.Errorf(`cannot use address_mode="driver": no driver network exists`)
  1443  		}
  1444  
  1445  		// If no port label is specified just return the IP
  1446  		if portLabel == "" {
  1447  			return driverNet.IP, 0, nil
  1448  		}
  1449  
  1450  		// If the port is a label, use the driver's port (not the host's)
  1451  		if port, ok := driverNet.PortMap[portLabel]; ok {
  1452  			return driverNet.IP, port, nil
  1453  		}
  1454  
  1455  		// If port isn't a label, try to parse it as a literal port number
  1456  		port, err := strconv.Atoi(portLabel)
  1457  		if err != nil {
  1458  			// Don't include Atoi error message as user likely
  1459  			// never intended it to be a numeric and it creates a
  1460  			// confusing error message
  1461  			return "", 0, fmt.Errorf("invalid port label %q: port labels in driver address_mode must be numeric or in the driver's port map", portLabel)
  1462  		}
  1463  		if port <= 0 {
  1464  			return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel)
  1465  		}
  1466  
  1467  		return driverNet.IP, port, nil
  1468  
  1469  	default:
  1470  		// Shouldn't happen due to validation, but enforce invariants
  1471  		return "", 0, fmt.Errorf("invalid address mode %q", addrMode)
  1472  	}
  1473  }