github.com/superfly/nomad@v0.10.5-fly/command/agent/consul/client.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"net/url"
     8  	"reflect"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	metrics "github.com/armon/go-metrics"
    16  	log "github.com/hashicorp/go-hclog"
    17  
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/nomad/helper"
    20  	"github.com/hashicorp/nomad/nomad/structs"
    21  	"github.com/hashicorp/nomad/plugins/drivers"
    22  )
    23  
    24  const (
    25  	// nomadServicePrefix is the prefix that scopes all Nomad registered
    26  	// services (both agent and task entries).
    27  	nomadServicePrefix = "_nomad"
    28  
    29  	// nomadTaskPrefix is the prefix that scopes Nomad registered services
    30  	// for tasks.
    31  	nomadTaskPrefix = nomadServicePrefix + "-task-"
    32  
    33  	// nomadCheckPrefix is the prefix that scopes Nomad registered checks for
    34  	// services.
    35  	nomadCheckPrefix = nomadServicePrefix + "-check-"
    36  
    37  	// defaultRetryInterval is how quickly to retry syncing services and
    38  	// checks to Consul when an error occurs. Will backoff up to a max.
    39  	defaultRetryInterval = time.Second
    40  
    41  	// defaultMaxRetryInterval is the default max retry interval.
    42  	defaultMaxRetryInterval = 30 * time.Second
    43  
    44  	// defaultPeriodicalInterval is the interval at which the service
    45  	// client reconciles state between the desired services and checks and
    46  	// what's actually registered in Consul. This is done at an interval,
    47  	// rather than being purely edge triggered, to handle the case that the
    48  	// Consul agent's state may change underneath us
    49  	defaultPeriodicInterval = 30 * time.Second
    50  
    51  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    52  	// the check result
    53  	ttlCheckBuffer = 31 * time.Second
    54  
    55  	// defaultShutdownWait is how long Shutdown() should block waiting for
    56  	// enqueued operations to sync to Consul by default.
    57  	defaultShutdownWait = time.Minute
    58  
    59  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    60  	// spend waiting for a response from a Consul Query.
    61  	DefaultQueryWaitDuration = 2 * time.Second
    62  
    63  	// ServiceTagHTTP is the tag assigned to HTTP services
    64  	ServiceTagHTTP = "http"
    65  
    66  	// ServiceTagRPC is the tag assigned to RPC services
    67  	ServiceTagRPC = "rpc"
    68  
    69  	// ServiceTagSerf is the tag assigned to Serf services
    70  	ServiceTagSerf = "serf"
    71  
    72  	// deregisterProbationPeriod is the initialization period where
    73  	// services registered in Consul but not in Nomad don't get deregistered,
    74  	// to allow for nomad restoring tasks
    75  	deregisterProbationPeriod = time.Minute
    76  )
    77  
    78  // CatalogAPI is the consul/api.Catalog API used by Nomad.
    79  type CatalogAPI interface {
    80  	Datacenters() ([]string, error)
    81  	Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error)
    82  }
    83  
    84  // AgentAPI is the consul/api.Agent API used by Nomad.
    85  type AgentAPI interface {
    86  	Services() (map[string]*api.AgentService, error)
    87  	Checks() (map[string]*api.AgentCheck, error)
    88  	CheckRegister(check *api.AgentCheckRegistration) error
    89  	CheckDeregister(checkID string) error
    90  	Self() (map[string]map[string]interface{}, error)
    91  	ServiceRegister(service *api.AgentServiceRegistration) error
    92  	ServiceDeregister(serviceID string) error
    93  	UpdateTTL(id, output, status string) error
    94  }
    95  
    96  // ACLsAPI is the consul/api.ACL API subset used by Nomad Server.
    97  type ACLsAPI interface {
    98  	// We are looking up by [operator token] SecretID, which implies we need
    99  	// to use this method instead of the normal TokenRead, which can only be
   100  	// used to lookup tokens by their AccessorID.
   101  	TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error)
   102  	PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error)
   103  	RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error)
   104  	TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error)
   105  	TokenDelete(accessorID string, q *api.WriteOptions) (*api.WriteMeta, error)
   106  	TokenList(q *api.QueryOptions) ([]*api.ACLTokenListEntry, *api.QueryMeta, error)
   107  }
   108  
   109  func agentServiceUpdateRequired(reg *api.AgentServiceRegistration, svc *api.AgentService) bool {
   110  	return !(reg.Kind == svc.Kind &&
   111  		reg.ID == svc.ID &&
   112  		reg.Port == svc.Port &&
   113  		reg.Address == svc.Address &&
   114  		reg.Name == svc.Service &&
   115  		reflect.DeepEqual(reg.Tags, svc.Tags) &&
   116  		reflect.DeepEqual(reg.Meta, svc.Meta))
   117  }
   118  
   119  // operations are submitted to the main loop via commit() for synchronizing
   120  // with Consul.
   121  type operations struct {
   122  	regServices   []*api.AgentServiceRegistration
   123  	regChecks     []*api.AgentCheckRegistration
   124  	deregServices []string
   125  	deregChecks   []string
   126  }
   127  
   128  // AllocRegistration holds the status of services registered for a particular
   129  // allocations by task.
   130  type AllocRegistration struct {
   131  	// Tasks maps the name of a task to its registered services and checks
   132  	Tasks map[string]*ServiceRegistrations
   133  }
   134  
   135  func (a *AllocRegistration) copy() *AllocRegistration {
   136  	c := &AllocRegistration{
   137  		Tasks: make(map[string]*ServiceRegistrations, len(a.Tasks)),
   138  	}
   139  
   140  	for k, v := range a.Tasks {
   141  		c.Tasks[k] = v.copy()
   142  	}
   143  
   144  	return c
   145  }
   146  
   147  // NumServices returns the number of registered services
   148  func (a *AllocRegistration) NumServices() int {
   149  	if a == nil {
   150  		return 0
   151  	}
   152  
   153  	total := 0
   154  	for _, treg := range a.Tasks {
   155  		for _, sreg := range treg.Services {
   156  			if sreg.Service != nil {
   157  				total++
   158  			}
   159  		}
   160  	}
   161  
   162  	return total
   163  }
   164  
   165  // NumChecks returns the number of registered checks
   166  func (a *AllocRegistration) NumChecks() int {
   167  	if a == nil {
   168  		return 0
   169  	}
   170  
   171  	total := 0
   172  	for _, treg := range a.Tasks {
   173  		for _, sreg := range treg.Services {
   174  			total += len(sreg.Checks)
   175  		}
   176  	}
   177  
   178  	return total
   179  }
   180  
   181  // ServiceRegistrations holds the status of services registered for a particular
   182  // task or task group.
   183  type ServiceRegistrations struct {
   184  	Services map[string]*ServiceRegistration
   185  }
   186  
   187  func (t *ServiceRegistrations) copy() *ServiceRegistrations {
   188  	c := &ServiceRegistrations{
   189  		Services: make(map[string]*ServiceRegistration, len(t.Services)),
   190  	}
   191  
   192  	for k, v := range t.Services {
   193  		c.Services[k] = v.copy()
   194  	}
   195  
   196  	return c
   197  }
   198  
   199  // ServiceRegistration holds the status of a registered Consul Service and its
   200  // Checks.
   201  type ServiceRegistration struct {
   202  	// serviceID and checkIDs are internal fields that track just the IDs of the
   203  	// services/checks registered in Consul. It is used to materialize the other
   204  	// fields when queried.
   205  	serviceID string
   206  	checkIDs  map[string]struct{}
   207  
   208  	// Service is the AgentService registered in Consul.
   209  	Service *api.AgentService
   210  
   211  	// Checks is the status of the registered checks.
   212  	Checks []*api.AgentCheck
   213  }
   214  
   215  func (s *ServiceRegistration) copy() *ServiceRegistration {
   216  	// Copy does not copy the external fields but only the internal fields. This
   217  	// is so that the caller of AllocRegistrations can not access the internal
   218  	// fields and that method uses these fields to populate the external fields.
   219  	return &ServiceRegistration{
   220  		serviceID: s.serviceID,
   221  		checkIDs:  helper.CopyMapStringStruct(s.checkIDs),
   222  	}
   223  }
   224  
   225  // ServiceClient handles task and agent service registration with Consul.
   226  type ServiceClient struct {
   227  	client           AgentAPI
   228  	logger           log.Logger
   229  	retryInterval    time.Duration
   230  	maxRetryInterval time.Duration
   231  	periodicInterval time.Duration
   232  
   233  	// exitCh is closed when the main Run loop exits
   234  	exitCh chan struct{}
   235  
   236  	// shutdownCh is closed when the client should shutdown
   237  	shutdownCh chan struct{}
   238  
   239  	// shutdownWait is how long Shutdown() blocks waiting for the final
   240  	// sync() to finish. Defaults to defaultShutdownWait
   241  	shutdownWait time.Duration
   242  
   243  	opCh chan *operations
   244  
   245  	services map[string]*api.AgentServiceRegistration
   246  	checks   map[string]*api.AgentCheckRegistration
   247  
   248  	explicitlyDeregisteredServices map[string]bool
   249  	explicitlyDeregisteredChecks   map[string]bool
   250  
   251  	// allocRegistrations stores the services and checks that are registered
   252  	// with Consul by allocation ID.
   253  	allocRegistrations     map[string]*AllocRegistration
   254  	allocRegistrationsLock sync.RWMutex
   255  
   256  	// agent services and checks record entries for the agent itself which
   257  	// should be removed on shutdown
   258  	agentServices map[string]struct{}
   259  	agentChecks   map[string]struct{}
   260  	agentLock     sync.Mutex
   261  
   262  	// seen is 1 if Consul has ever been seen; otherwise 0. Accessed with
   263  	// atomics.
   264  	seen int32
   265  
   266  	// deregisterProbationExpiry is the time before which consul sync shouldn't deregister
   267  	// unknown services.
   268  	// Used to mitigate risk of deleting restored services upon client restart.
   269  	deregisterProbationExpiry time.Time
   270  
   271  	// checkWatcher restarts checks that are unhealthy.
   272  	checkWatcher *checkWatcher
   273  
   274  	// isClientAgent specifies whether this Consul client is being used
   275  	// by a Nomad client.
   276  	isClientAgent bool
   277  }
   278  
   279  // NewServiceClient creates a new Consul ServiceClient from an existing Consul API
   280  // Client, logger and takes whether the client is being used by a Nomad Client agent.
   281  // When being used by a Nomad client, this Consul client reconciles all services and
   282  // checks created by Nomad on behalf of running tasks.
   283  func NewServiceClient(consulClient AgentAPI, logger log.Logger, isNomadClient bool) *ServiceClient {
   284  	logger = logger.ResetNamed("consul.sync")
   285  	return &ServiceClient{
   286  		client:                         consulClient,
   287  		logger:                         logger,
   288  		retryInterval:                  defaultRetryInterval,
   289  		maxRetryInterval:               defaultMaxRetryInterval,
   290  		periodicInterval:               defaultPeriodicInterval,
   291  		exitCh:                         make(chan struct{}),
   292  		shutdownCh:                     make(chan struct{}),
   293  		shutdownWait:                   defaultShutdownWait,
   294  		opCh:                           make(chan *operations, 8),
   295  		services:                       make(map[string]*api.AgentServiceRegistration),
   296  		checks:                         make(map[string]*api.AgentCheckRegistration),
   297  		explicitlyDeregisteredServices: make(map[string]bool),
   298  		explicitlyDeregisteredChecks:   make(map[string]bool),
   299  		allocRegistrations:             make(map[string]*AllocRegistration),
   300  		agentServices:                  make(map[string]struct{}),
   301  		agentChecks:                    make(map[string]struct{}),
   302  		checkWatcher:                   newCheckWatcher(logger, consulClient),
   303  		isClientAgent:                  isNomadClient,
   304  		deregisterProbationExpiry:      time.Now().Add(deregisterProbationPeriod),
   305  	}
   306  }
   307  
   308  // seen is used by markSeen and hasSeen
   309  const seen = 1
   310  
   311  // markSeen marks Consul as having been seen (meaning at least one operation
   312  // has succeeded).
   313  func (c *ServiceClient) markSeen() {
   314  	atomic.StoreInt32(&c.seen, seen)
   315  }
   316  
   317  // hasSeen returns true if any Consul operation has ever succeeded. Useful to
   318  // squelch errors if Consul isn't running.
   319  func (c *ServiceClient) hasSeen() bool {
   320  	return atomic.LoadInt32(&c.seen) == seen
   321  }
   322  
   323  // Run the Consul main loop which retries operations against Consul. It should
   324  // be called exactly once.
   325  func (c *ServiceClient) Run() {
   326  	defer close(c.exitCh)
   327  
   328  	ctx, cancel := context.WithCancel(context.Background())
   329  	defer cancel()
   330  
   331  	// init will be closed when Consul has been contacted
   332  	init := make(chan struct{})
   333  	go checkConsulTLSSkipVerify(ctx, c.logger, c.client, init)
   334  
   335  	// Process operations while waiting for initial contact with Consul but
   336  	// do not sync until contact has been made.
   337  INIT:
   338  	for {
   339  		select {
   340  		case <-init:
   341  			c.markSeen()
   342  			break INIT
   343  		case <-c.shutdownCh:
   344  			return
   345  		case ops := <-c.opCh:
   346  			c.merge(ops)
   347  		}
   348  	}
   349  	c.logger.Trace("able to contact Consul")
   350  
   351  	// Block until contact with Consul has been established
   352  	// Start checkWatcher
   353  	go c.checkWatcher.Run(ctx)
   354  
   355  	// Always immediately sync to reconcile Nomad and Consul's state
   356  	retryTimer := time.NewTimer(0)
   357  
   358  	failures := 0
   359  	for {
   360  		select {
   361  		case <-retryTimer.C:
   362  		case <-c.shutdownCh:
   363  			// Cancel check watcher but sync one last time
   364  			cancel()
   365  		case ops := <-c.opCh:
   366  			c.merge(ops)
   367  		}
   368  
   369  		if err := c.sync(); err != nil {
   370  			if failures == 0 {
   371  				// Log on the first failure
   372  				c.logger.Warn("failed to update services in Consul", "error", err)
   373  			} else if failures%10 == 0 {
   374  				// Log every 10th consecutive failure
   375  				c.logger.Error("still unable to update services in Consul", "failures", failures, "error", err)
   376  			}
   377  
   378  			failures++
   379  			if !retryTimer.Stop() {
   380  				// Timer already expired, since the timer may
   381  				// or may not have been read in the select{}
   382  				// above, conditionally receive on it
   383  				select {
   384  				case <-retryTimer.C:
   385  				default:
   386  				}
   387  			}
   388  			backoff := c.retryInterval * time.Duration(failures)
   389  			if backoff > c.maxRetryInterval {
   390  				backoff = c.maxRetryInterval
   391  			}
   392  			retryTimer.Reset(backoff)
   393  		} else {
   394  			if failures > 0 {
   395  				c.logger.Info("successfully updated services in Consul")
   396  				failures = 0
   397  			}
   398  
   399  			// on successful sync, clear deregistered consul entities
   400  			c.clearExplicitlyDeregistered()
   401  
   402  			// Reset timer to periodic interval to periodically
   403  			// reconile with Consul
   404  			if !retryTimer.Stop() {
   405  				select {
   406  				case <-retryTimer.C:
   407  				default:
   408  				}
   409  			}
   410  			retryTimer.Reset(c.periodicInterval)
   411  		}
   412  
   413  		select {
   414  		case <-c.shutdownCh:
   415  			// Exit only after sync'ing all outstanding operations
   416  			if len(c.opCh) > 0 {
   417  				for len(c.opCh) > 0 {
   418  					c.merge(<-c.opCh)
   419  				}
   420  				continue
   421  			}
   422  			return
   423  		default:
   424  		}
   425  
   426  	}
   427  }
   428  
   429  // commit operations unless already shutting down.
   430  func (c *ServiceClient) commit(ops *operations) {
   431  	select {
   432  	case c.opCh <- ops:
   433  	case <-c.shutdownCh:
   434  	}
   435  }
   436  
   437  func (c *ServiceClient) clearExplicitlyDeregistered() {
   438  	c.explicitlyDeregisteredServices = map[string]bool{}
   439  	c.explicitlyDeregisteredChecks = map[string]bool{}
   440  }
   441  
   442  // merge registrations into state map prior to sync'ing with Consul
   443  func (c *ServiceClient) merge(ops *operations) {
   444  	for _, s := range ops.regServices {
   445  		c.services[s.ID] = s
   446  	}
   447  	for _, check := range ops.regChecks {
   448  		c.checks[check.ID] = check
   449  	}
   450  	for _, sid := range ops.deregServices {
   451  		delete(c.services, sid)
   452  		c.explicitlyDeregisteredServices[sid] = true
   453  	}
   454  	for _, cid := range ops.deregChecks {
   455  		delete(c.checks, cid)
   456  		c.explicitlyDeregisteredChecks[cid] = true
   457  	}
   458  	metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
   459  	metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
   460  }
   461  
   462  // sync enqueued operations.
   463  func (c *ServiceClient) sync() error {
   464  	sreg, creg, sdereg, cdereg := 0, 0, 0, 0
   465  
   466  	consulServices, err := c.client.Services()
   467  	if err != nil {
   468  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   469  		return fmt.Errorf("error querying Consul services: %v", err)
   470  	}
   471  
   472  	consulChecks, err := c.client.Checks()
   473  	if err != nil {
   474  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   475  		return fmt.Errorf("error querying Consul checks: %v", err)
   476  	}
   477  
   478  	inProbation := time.Now().Before(c.deregisterProbationExpiry)
   479  
   480  	// Remove Nomad services in Consul but unknown locally
   481  	for id := range consulServices {
   482  		if _, ok := c.services[id]; ok {
   483  			// Known service, skip
   484  			continue
   485  		}
   486  
   487  		// Ignore if this is not a Nomad managed service. Also ignore
   488  		// Nomad managed services if this is not a client agent.
   489  		// This is to prevent server agents from removing services
   490  		// registered by client agents
   491  		if !isNomadService(id) || !c.isClientAgent {
   492  			// Not managed by Nomad, skip
   493  			continue
   494  		}
   495  
   496  		// Ignore unknown services during probation
   497  		if inProbation && !c.explicitlyDeregisteredServices[id] {
   498  			continue
   499  		}
   500  
   501  		// Ignore if this is a service for a Nomad managed sidecar proxy.
   502  		if isNomadSidecar(id, c.services) {
   503  			continue
   504  		}
   505  
   506  		// Unknown Nomad managed service; kill
   507  		if err := c.client.ServiceDeregister(id); err != nil {
   508  			if isOldNomadService(id) {
   509  				// Don't hard-fail on old entries. See #3620
   510  				continue
   511  			}
   512  
   513  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   514  			return err
   515  		}
   516  		sdereg++
   517  		metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1)
   518  	}
   519  
   520  	// Add Nomad services missing from Consul, or where the service has been updated.
   521  	for id, locals := range c.services {
   522  		existingSvc, ok := consulServices[id]
   523  
   524  		if ok {
   525  			// There is an existing registration of this service in Consul, so here
   526  			// we validate to see if the service has been invalidated to see if it
   527  			// should be updated.
   528  			if !agentServiceUpdateRequired(locals, existingSvc) {
   529  				// No Need to update services that have not changed
   530  				continue
   531  			}
   532  		}
   533  
   534  		if err = c.client.ServiceRegister(locals); err != nil {
   535  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   536  			return err
   537  		}
   538  		sreg++
   539  		metrics.IncrCounter([]string{"client", "consul", "service_registrations"}, 1)
   540  	}
   541  
   542  	// Remove Nomad checks in Consul but unknown locally
   543  	for id, check := range consulChecks {
   544  		if _, ok := c.checks[id]; ok {
   545  			// Known check, leave it
   546  			continue
   547  		}
   548  
   549  		// Ignore if this is not a Nomad managed check. Also ignore
   550  		// Nomad managed checks if this is not a client agent.
   551  		// This is to prevent server agents from removing checks
   552  		// registered by client agents
   553  		if !isNomadService(check.ServiceID) || !c.isClientAgent || !isNomadCheck(check.CheckID) {
   554  			// Service not managed by Nomad, skip
   555  			continue
   556  		}
   557  
   558  		// Ignore unknown services during probation
   559  		if inProbation && !c.explicitlyDeregisteredChecks[id] {
   560  			continue
   561  		}
   562  
   563  		// Ignore if this is a check for a Nomad managed sidecar proxy.
   564  		if isNomadSidecar(check.ServiceID, c.services) {
   565  			continue
   566  		}
   567  
   568  		// Unknown Nomad managed check; remove
   569  		if err := c.client.CheckDeregister(id); err != nil {
   570  			if isOldNomadService(check.ServiceID) {
   571  				// Don't hard-fail on old entries.
   572  				continue
   573  			}
   574  
   575  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   576  			return err
   577  		}
   578  		cdereg++
   579  		metrics.IncrCounter([]string{"client", "consul", "check_deregistrations"}, 1)
   580  	}
   581  
   582  	// Add Nomad checks missing from Consul
   583  	for id, check := range c.checks {
   584  		if _, ok := consulChecks[id]; ok {
   585  			// Already in Consul; skipping
   586  			continue
   587  		}
   588  
   589  		if err := c.client.CheckRegister(check); err != nil {
   590  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   591  			return err
   592  		}
   593  		creg++
   594  		metrics.IncrCounter([]string{"client", "consul", "check_registrations"}, 1)
   595  	}
   596  
   597  	// Only log if something was actually synced
   598  	if sreg > 0 || sdereg > 0 || creg > 0 || cdereg > 0 {
   599  		c.logger.Debug("sync complete", "registered_services", sreg, "deregistered_services", sdereg,
   600  			"registered_checks", creg, "deregistered_checks", cdereg)
   601  	}
   602  	return nil
   603  }
   604  
   605  // RegisterAgent registers Nomad agents (client or server). The
   606  // Service.PortLabel should be a literal port to be parsed with SplitHostPort.
   607  // Script checks are not supported and will return an error. Registration is
   608  // asynchronous.
   609  //
   610  // Agents will be deregistered when Shutdown is called.
   611  func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error {
   612  	ops := operations{}
   613  
   614  	for _, service := range services {
   615  		id := makeAgentServiceID(role, service)
   616  
   617  		// Unlike tasks, agents don't use port labels. Agent ports are
   618  		// stored directly in the PortLabel.
   619  		host, rawport, err := net.SplitHostPort(service.PortLabel)
   620  		if err != nil {
   621  			return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err)
   622  		}
   623  		port, err := strconv.Atoi(rawport)
   624  		if err != nil {
   625  			return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err)
   626  		}
   627  		serviceReg := &api.AgentServiceRegistration{
   628  			ID:      id,
   629  			Name:    service.Name,
   630  			Tags:    service.Tags,
   631  			Address: host,
   632  			Port:    port,
   633  			// This enables the consul UI to show that Nomad registered this service
   634  			Meta: map[string]string{
   635  				"external-source": "nomad",
   636  			},
   637  		}
   638  		ops.regServices = append(ops.regServices, serviceReg)
   639  
   640  		for _, check := range service.Checks {
   641  			checkID := MakeCheckID(id, check)
   642  			if check.Type == structs.ServiceCheckScript {
   643  				return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name)
   644  			}
   645  			checkHost, checkPort := serviceReg.Address, serviceReg.Port
   646  			if check.PortLabel != "" {
   647  				// Unlike tasks, agents don't use port labels. Agent ports are
   648  				// stored directly in the PortLabel.
   649  				host, rawport, err := net.SplitHostPort(check.PortLabel)
   650  				if err != nil {
   651  					return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err)
   652  				}
   653  				port, err := strconv.Atoi(rawport)
   654  				if err != nil {
   655  					return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err)
   656  				}
   657  				checkHost, checkPort = host, port
   658  			}
   659  			checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort)
   660  			if err != nil {
   661  				return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   662  			}
   663  			ops.regChecks = append(ops.regChecks, checkReg)
   664  		}
   665  	}
   666  
   667  	// Don't bother committing agent checks if we're already shutting down
   668  	c.agentLock.Lock()
   669  	defer c.agentLock.Unlock()
   670  	select {
   671  	case <-c.shutdownCh:
   672  		return nil
   673  	default:
   674  	}
   675  
   676  	// Now add them to the registration queue
   677  	c.commit(&ops)
   678  
   679  	// Record IDs for deregistering on shutdown
   680  	for _, id := range ops.regServices {
   681  		c.agentServices[id.ID] = struct{}{}
   682  	}
   683  	for _, id := range ops.regChecks {
   684  		c.agentChecks[id.ID] = struct{}{}
   685  	}
   686  	return nil
   687  }
   688  
   689  // serviceRegs creates service registrations, check registrations, and script
   690  // checks from a service. It returns a service registration object with the
   691  // service and check IDs populated.
   692  func (c *ServiceClient) serviceRegs(ops *operations, service *structs.Service, workload *WorkloadServices) (
   693  	*ServiceRegistration, error) {
   694  
   695  	// Get the services ID
   696  	id := MakeAllocServiceID(workload.AllocID, workload.Name(), service)
   697  	sreg := &ServiceRegistration{
   698  		serviceID: id,
   699  		checkIDs:  make(map[string]struct{}, len(service.Checks)),
   700  	}
   701  
   702  	// Service address modes default to auto
   703  	addrMode := service.AddressMode
   704  	if addrMode == "" {
   705  		addrMode = structs.AddressModeAuto
   706  	}
   707  
   708  	// Determine the address to advertise based on the mode
   709  	ip, port, err := getAddress(addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork)
   710  	if err != nil {
   711  		return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err)
   712  	}
   713  
   714  	// Determine whether to use tags or canary_tags
   715  	var tags []string
   716  	if workload.Canary && len(service.CanaryTags) > 0 {
   717  		tags = make([]string, len(service.CanaryTags))
   718  		copy(tags, service.CanaryTags)
   719  	} else {
   720  		tags = make([]string, len(service.Tags))
   721  		copy(tags, service.Tags)
   722  	}
   723  
   724  	// newConnect returns (nil, nil) if there's no Connect-enabled service.
   725  	connect, err := newConnect(service.Name, service.Connect, workload.Networks)
   726  	if err != nil {
   727  		return nil, fmt.Errorf("invalid Consul Connect configuration for service %q: %v", service.Name, err)
   728  	}
   729  
   730  	// Determine whether to use meta or canary_meta
   731  	var meta map[string]string
   732  	if workload.Canary && len(service.CanaryMeta) > 0 {
   733  		meta = make(map[string]string, len(service.CanaryMeta)+1)
   734  		for k, v := range service.CanaryMeta {
   735  			meta[k] = v
   736  		}
   737  	} else {
   738  		meta = make(map[string]string, len(service.Meta)+1)
   739  		for k, v := range service.Meta {
   740  			meta[k] = v
   741  		}
   742  	}
   743  
   744  	// This enables the consul UI to show that Nomad registered this service
   745  	meta["external-source"] = "nomad"
   746  
   747  	// Build the Consul Service registration request
   748  	serviceReg := &api.AgentServiceRegistration{
   749  		ID:      id,
   750  		Name:    service.Name,
   751  		Tags:    tags,
   752  		Address: ip,
   753  		Port:    port,
   754  		Meta:    meta,
   755  		Connect: connect, // will be nil if no Connect stanza
   756  	}
   757  	ops.regServices = append(ops.regServices, serviceReg)
   758  
   759  	// Build the check registrations
   760  	checkIDs, err := c.checkRegs(ops, id, service, workload)
   761  	if err != nil {
   762  		return nil, err
   763  	}
   764  	for _, cid := range checkIDs {
   765  		sreg.checkIDs[cid] = struct{}{}
   766  	}
   767  	return sreg, nil
   768  }
   769  
   770  // checkRegs registers the checks for the given service and returns the
   771  // registered check ids.
   772  func (c *ServiceClient) checkRegs(ops *operations, serviceID string, service *structs.Service,
   773  	workload *WorkloadServices) ([]string, error) {
   774  
   775  	// Fast path
   776  	numChecks := len(service.Checks)
   777  	if numChecks == 0 {
   778  		return nil, nil
   779  	}
   780  
   781  	checkIDs := make([]string, 0, numChecks)
   782  	for _, check := range service.Checks {
   783  		checkID := MakeCheckID(serviceID, check)
   784  		checkIDs = append(checkIDs, checkID)
   785  		if check.Type == structs.ServiceCheckScript {
   786  			// Skip getAddress for script checks
   787  			checkReg, err := createCheckReg(serviceID, checkID, check, "", 0)
   788  			if err != nil {
   789  				return nil, fmt.Errorf("failed to add script check %q: %v", check.Name, err)
   790  			}
   791  			ops.regChecks = append(ops.regChecks, checkReg)
   792  			continue
   793  		}
   794  
   795  		// Default to the service's port but allow check to override
   796  		portLabel := check.PortLabel
   797  		if portLabel == "" {
   798  			// Default to the service's port label
   799  			portLabel = service.PortLabel
   800  		}
   801  
   802  		// Checks address mode defaults to host for pre-#3380 backward compat
   803  		addrMode := check.AddressMode
   804  		if addrMode == "" {
   805  			addrMode = structs.AddressModeHost
   806  		}
   807  
   808  		ip, port, err := getAddress(addrMode, portLabel, workload.Networks, workload.DriverNetwork)
   809  		if err != nil {
   810  			return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err)
   811  		}
   812  
   813  		checkReg, err := createCheckReg(serviceID, checkID, check, ip, port)
   814  		if err != nil {
   815  			return nil, fmt.Errorf("failed to add check %q: %v", check.Name, err)
   816  		}
   817  		ops.regChecks = append(ops.regChecks, checkReg)
   818  	}
   819  	return checkIDs, nil
   820  }
   821  
   822  // RegisterWorkload with Consul. Adds all service entries and checks to Consul.
   823  //
   824  // If the service IP is set it used as the address in the service registration.
   825  // Checks will always use the IP from the Task struct (host's IP).
   826  //
   827  // Actual communication with Consul is done asynchronously (see Run).
   828  func (c *ServiceClient) RegisterWorkload(workload *WorkloadServices) error {
   829  	// Fast path
   830  	numServices := len(workload.Services)
   831  	if numServices == 0 {
   832  		return nil
   833  	}
   834  
   835  	t := new(ServiceRegistrations)
   836  	t.Services = make(map[string]*ServiceRegistration, numServices)
   837  
   838  	ops := &operations{}
   839  	for _, service := range workload.Services {
   840  		sreg, err := c.serviceRegs(ops, service, workload)
   841  		if err != nil {
   842  			return err
   843  		}
   844  		t.Services[sreg.serviceID] = sreg
   845  	}
   846  
   847  	// Add the workload to the allocation's registration
   848  	c.addRegistrations(workload.AllocID, workload.Name(), t)
   849  
   850  	c.commit(ops)
   851  
   852  	// Start watching checks. Done after service registrations are built
   853  	// since an error building them could leak watches.
   854  	for _, service := range workload.Services {
   855  		serviceID := MakeAllocServiceID(workload.AllocID, workload.Name(), service)
   856  		for _, check := range service.Checks {
   857  			if check.TriggersRestarts() {
   858  				checkID := MakeCheckID(serviceID, check)
   859  				c.checkWatcher.Watch(workload.AllocID, workload.Name(), checkID, check, workload.Restarter)
   860  			}
   861  		}
   862  	}
   863  	return nil
   864  }
   865  
   866  // UpdateWorkload in Consul. Does not alter the service if only checks have
   867  // changed.
   868  //
   869  // DriverNetwork must not change between invocations for the same allocation.
   870  func (c *ServiceClient) UpdateWorkload(old, newWorkload *WorkloadServices) error {
   871  	ops := &operations{}
   872  
   873  	regs := new(ServiceRegistrations)
   874  	regs.Services = make(map[string]*ServiceRegistration, len(newWorkload.Services))
   875  
   876  	existingIDs := make(map[string]*structs.Service, len(old.Services))
   877  	for _, s := range old.Services {
   878  		existingIDs[MakeAllocServiceID(old.AllocID, old.Name(), s)] = s
   879  	}
   880  	newIDs := make(map[string]*structs.Service, len(newWorkload.Services))
   881  	for _, s := range newWorkload.Services {
   882  		newIDs[MakeAllocServiceID(newWorkload.AllocID, newWorkload.Name(), s)] = s
   883  	}
   884  
   885  	// Loop over existing Service IDs to see if they have been removed
   886  	for existingID, existingSvc := range existingIDs {
   887  		newSvc, ok := newIDs[existingID]
   888  
   889  		if !ok {
   890  			// Existing service entry removed
   891  			ops.deregServices = append(ops.deregServices, existingID)
   892  			for _, check := range existingSvc.Checks {
   893  				cid := MakeCheckID(existingID, check)
   894  				ops.deregChecks = append(ops.deregChecks, cid)
   895  
   896  				// Unwatch watched checks
   897  				if check.TriggersRestarts() {
   898  					c.checkWatcher.Unwatch(cid)
   899  				}
   900  			}
   901  			continue
   902  		}
   903  
   904  		oldHash := existingSvc.Hash(old.AllocID, old.Name(), old.Canary)
   905  		newHash := newSvc.Hash(newWorkload.AllocID, newWorkload.Name(), newWorkload.Canary)
   906  		if oldHash == newHash {
   907  			// Service exists and hasn't changed, don't re-add it later
   908  			delete(newIDs, existingID)
   909  		}
   910  
   911  		// Service still exists so add it to the task's registration
   912  		sreg := &ServiceRegistration{
   913  			serviceID: existingID,
   914  			checkIDs:  make(map[string]struct{}, len(newSvc.Checks)),
   915  		}
   916  		regs.Services[existingID] = sreg
   917  
   918  		// See if any checks were updated
   919  		existingChecks := make(map[string]*structs.ServiceCheck, len(existingSvc.Checks))
   920  		for _, check := range existingSvc.Checks {
   921  			existingChecks[MakeCheckID(existingID, check)] = check
   922  		}
   923  
   924  		// Register new checks
   925  		for _, check := range newSvc.Checks {
   926  			checkID := MakeCheckID(existingID, check)
   927  			if _, exists := existingChecks[checkID]; exists {
   928  				// Check is still required. Remove it from the map so it doesn't get
   929  				// deleted later.
   930  				delete(existingChecks, checkID)
   931  				sreg.checkIDs[checkID] = struct{}{}
   932  			}
   933  
   934  			// New check on an unchanged service; add them now
   935  			newCheckIDs, err := c.checkRegs(ops, existingID, newSvc, newWorkload)
   936  			if err != nil {
   937  				return err
   938  			}
   939  
   940  			for _, checkID := range newCheckIDs {
   941  				sreg.checkIDs[checkID] = struct{}{}
   942  			}
   943  
   944  			// Update all watched checks as CheckRestart fields aren't part of ID
   945  			if check.TriggersRestarts() {
   946  				c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
   947  			}
   948  		}
   949  
   950  		// Remove existing checks not in updated service
   951  		for cid, check := range existingChecks {
   952  			ops.deregChecks = append(ops.deregChecks, cid)
   953  
   954  			// Unwatch checks
   955  			if check.TriggersRestarts() {
   956  				c.checkWatcher.Unwatch(cid)
   957  			}
   958  		}
   959  	}
   960  
   961  	// Any remaining services should just be enqueued directly
   962  	for _, newSvc := range newIDs {
   963  		sreg, err := c.serviceRegs(ops, newSvc, newWorkload)
   964  		if err != nil {
   965  			return err
   966  		}
   967  
   968  		regs.Services[sreg.serviceID] = sreg
   969  	}
   970  
   971  	// Add the task to the allocation's registration
   972  	c.addRegistrations(newWorkload.AllocID, newWorkload.Name(), regs)
   973  
   974  	c.commit(ops)
   975  
   976  	// Start watching checks. Done after service registrations are built
   977  	// since an error building them could leak watches.
   978  	for _, service := range newIDs {
   979  		serviceID := MakeAllocServiceID(newWorkload.AllocID, newWorkload.Name(), service)
   980  		for _, check := range service.Checks {
   981  			if check.TriggersRestarts() {
   982  				checkID := MakeCheckID(serviceID, check)
   983  				c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
   984  			}
   985  		}
   986  	}
   987  	return nil
   988  }
   989  
   990  // RemoveWorkload from Consul. Removes all service entries and checks.
   991  //
   992  // Actual communication with Consul is done asynchronously (see Run).
   993  func (c *ServiceClient) RemoveWorkload(workload *WorkloadServices) {
   994  	ops := operations{}
   995  
   996  	for _, service := range workload.Services {
   997  		id := MakeAllocServiceID(workload.AllocID, workload.Name(), service)
   998  		ops.deregServices = append(ops.deregServices, id)
   999  
  1000  		for _, check := range service.Checks {
  1001  			cid := MakeCheckID(id, check)
  1002  			ops.deregChecks = append(ops.deregChecks, cid)
  1003  
  1004  			if check.TriggersRestarts() {
  1005  				c.checkWatcher.Unwatch(cid)
  1006  			}
  1007  		}
  1008  	}
  1009  
  1010  	// Remove the workload from the alloc's registrations
  1011  	c.removeRegistration(workload.AllocID, workload.Name())
  1012  
  1013  	// Now add them to the deregistration fields; main Run loop will update
  1014  	c.commit(&ops)
  1015  }
  1016  
  1017  // AllocRegistrations returns the registrations for the given allocation. If the
  1018  // allocation has no reservations, the response is a nil object.
  1019  func (c *ServiceClient) AllocRegistrations(allocID string) (*AllocRegistration, error) {
  1020  	// Get the internal struct using the lock
  1021  	c.allocRegistrationsLock.RLock()
  1022  	regInternal, ok := c.allocRegistrations[allocID]
  1023  	if !ok {
  1024  		c.allocRegistrationsLock.RUnlock()
  1025  		return nil, nil
  1026  	}
  1027  
  1028  	// Copy so we don't expose internal structs
  1029  	reg := regInternal.copy()
  1030  	c.allocRegistrationsLock.RUnlock()
  1031  
  1032  	// Query the services and checks to populate the allocation registrations.
  1033  	services, err := c.client.Services()
  1034  	if err != nil {
  1035  		return nil, err
  1036  	}
  1037  
  1038  	checks, err := c.client.Checks()
  1039  	if err != nil {
  1040  		return nil, err
  1041  	}
  1042  
  1043  	// Populate the object
  1044  	for _, treg := range reg.Tasks {
  1045  		for serviceID, sreg := range treg.Services {
  1046  			sreg.Service = services[serviceID]
  1047  			for checkID := range sreg.checkIDs {
  1048  				if check, ok := checks[checkID]; ok {
  1049  					sreg.Checks = append(sreg.Checks, check)
  1050  				}
  1051  			}
  1052  		}
  1053  	}
  1054  
  1055  	return reg, nil
  1056  }
  1057  
  1058  // UpdateTTL is used to update the TTL of a check. Typically this will only be
  1059  // called to heartbeat script checks.
  1060  func (c *ServiceClient) UpdateTTL(id, output, status string) error {
  1061  	return c.client.UpdateTTL(id, output, status)
  1062  }
  1063  
  1064  // Shutdown the Consul client. Update running task registrations and deregister
  1065  // agent from Consul. On first call blocks up to shutdownWait before giving up
  1066  // on syncing operations.
  1067  func (c *ServiceClient) Shutdown() error {
  1068  	// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
  1069  	// entries.
  1070  	c.agentLock.Lock()
  1071  	defer c.agentLock.Unlock()
  1072  	select {
  1073  	case <-c.shutdownCh:
  1074  		return nil
  1075  	default:
  1076  		close(c.shutdownCh)
  1077  	}
  1078  
  1079  	// Give run loop time to sync, but don't block indefinitely
  1080  	deadline := time.After(c.shutdownWait)
  1081  
  1082  	// Wait for Run to finish any outstanding operations and exit
  1083  	select {
  1084  	case <-c.exitCh:
  1085  	case <-deadline:
  1086  		// Don't wait forever though
  1087  	}
  1088  
  1089  	// If Consul was never seen nothing could be written so exit early
  1090  	if !c.hasSeen() {
  1091  		return nil
  1092  	}
  1093  
  1094  	// Always attempt to deregister Nomad agent Consul entries, even if
  1095  	// deadline was reached
  1096  	for id := range c.agentServices {
  1097  		if err := c.client.ServiceDeregister(id); err != nil {
  1098  			c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
  1099  		}
  1100  	}
  1101  	for id := range c.agentChecks {
  1102  		if err := c.client.CheckDeregister(id); err != nil {
  1103  			c.logger.Error("failed deregistering agent check", "check_id", id, "error", err)
  1104  		}
  1105  	}
  1106  
  1107  	return nil
  1108  }
  1109  
  1110  // addRegistration adds the service registrations for the given allocation.
  1111  func (c *ServiceClient) addRegistrations(allocID, taskName string, reg *ServiceRegistrations) {
  1112  	c.allocRegistrationsLock.Lock()
  1113  	defer c.allocRegistrationsLock.Unlock()
  1114  
  1115  	alloc, ok := c.allocRegistrations[allocID]
  1116  	if !ok {
  1117  		alloc = &AllocRegistration{
  1118  			Tasks: make(map[string]*ServiceRegistrations),
  1119  		}
  1120  		c.allocRegistrations[allocID] = alloc
  1121  	}
  1122  	alloc.Tasks[taskName] = reg
  1123  }
  1124  
  1125  // removeRegistrations removes the registration for the given allocation.
  1126  func (c *ServiceClient) removeRegistration(allocID, taskName string) {
  1127  	c.allocRegistrationsLock.Lock()
  1128  	defer c.allocRegistrationsLock.Unlock()
  1129  
  1130  	alloc, ok := c.allocRegistrations[allocID]
  1131  	if !ok {
  1132  		return
  1133  	}
  1134  
  1135  	// Delete the task and if it is the last one also delete the alloc's
  1136  	// registration
  1137  	delete(alloc.Tasks, taskName)
  1138  	if len(alloc.Tasks) == 0 {
  1139  		delete(c.allocRegistrations, allocID)
  1140  	}
  1141  }
  1142  
  1143  // makeAgentServiceID creates a unique ID for identifying an agent service in
  1144  // Consul.
  1145  //
  1146  // Agent service IDs are of the form:
  1147  //
  1148  //	{nomadServicePrefix}-{ROLE}-b32(sha1({Service.Name}-{Service.Tags...})
  1149  //	Example Server ID: _nomad-server-fbbk265qn4tmt25nd4ep42tjvmyj3hr4
  1150  //	Example Client ID: _nomad-client-ggnjpgl7yn7rgmvxzilmpvrzzvrszc7l
  1151  //
  1152  func makeAgentServiceID(role string, service *structs.Service) string {
  1153  	return fmt.Sprintf("%s-%s-%s", nomadServicePrefix, role, service.Hash(role, "", false))
  1154  }
  1155  
  1156  // MakeAllocServiceID creates a unique ID for identifying an alloc service in
  1157  // Consul.
  1158  //
  1159  //	Example Service ID: _nomad-task-b4e61df9-b095-d64e-f241-23860da1375f-redis-http-http
  1160  func MakeAllocServiceID(allocID, taskName string, service *structs.Service) string {
  1161  	return fmt.Sprintf("%s%s-%s-%s-%s", nomadTaskPrefix, allocID, taskName, service.Name, service.PortLabel)
  1162  }
  1163  
  1164  // MakeCheckID creates a unique ID for a check.
  1165  //
  1166  //  Example Check ID: _nomad-check-434ae42f9a57c5705344974ac38de2aee0ee089d
  1167  func MakeCheckID(serviceID string, check *structs.ServiceCheck) string {
  1168  	return fmt.Sprintf("%s%s", nomadCheckPrefix, check.Hash(serviceID))
  1169  }
  1170  
  1171  // createCheckReg creates a Check that can be registered with Consul.
  1172  //
  1173  // Script checks simply have a TTL set and the caller is responsible for
  1174  // running the script and heartbeating.
  1175  func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) {
  1176  	chkReg := api.AgentCheckRegistration{
  1177  		ID:        checkID,
  1178  		Name:      check.Name,
  1179  		ServiceID: serviceID,
  1180  	}
  1181  	chkReg.Status = check.InitialStatus
  1182  	chkReg.Timeout = check.Timeout.String()
  1183  	chkReg.Interval = check.Interval.String()
  1184  
  1185  	// Require an address for http or tcp checks
  1186  	if port == 0 && check.RequiresPort() {
  1187  		return nil, fmt.Errorf("%s checks require an address", check.Type)
  1188  	}
  1189  
  1190  	switch check.Type {
  1191  	case structs.ServiceCheckHTTP:
  1192  		proto := check.Protocol
  1193  		if proto == "" {
  1194  			proto = "http"
  1195  		}
  1196  		if check.TLSSkipVerify {
  1197  			chkReg.TLSSkipVerify = true
  1198  		}
  1199  		base := url.URL{
  1200  			Scheme: proto,
  1201  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
  1202  		}
  1203  		relative, err := url.Parse(check.Path)
  1204  		if err != nil {
  1205  			return nil, err
  1206  		}
  1207  		url := base.ResolveReference(relative)
  1208  		chkReg.HTTP = url.String()
  1209  		chkReg.Method = check.Method
  1210  		chkReg.Header = check.Header
  1211  
  1212  	case structs.ServiceCheckTCP:
  1213  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
  1214  
  1215  	case structs.ServiceCheckScript:
  1216  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
  1217  		// As of Consul 1.0.0 setting TTL and Interval is a 400
  1218  		chkReg.Interval = ""
  1219  
  1220  	case structs.ServiceCheckGRPC:
  1221  		chkReg.GRPC = fmt.Sprintf("%s/%s", net.JoinHostPort(host, strconv.Itoa(port)), check.GRPCService)
  1222  		chkReg.GRPCUseTLS = check.GRPCUseTLS
  1223  		if check.TLSSkipVerify {
  1224  			chkReg.TLSSkipVerify = true
  1225  		}
  1226  
  1227  	default:
  1228  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
  1229  	}
  1230  	return &chkReg, nil
  1231  }
  1232  
  1233  // isNomadCheck returns true if the ID matches the pattern of a Nomad managed
  1234  // check.
  1235  func isNomadCheck(id string) bool {
  1236  	return strings.HasPrefix(id, nomadCheckPrefix)
  1237  }
  1238  
  1239  // isNomadService returns true if the ID matches the pattern of a Nomad managed
  1240  // service (new or old formats). Agent services return false as independent
  1241  // client and server agents may be running on the same machine. #2827
  1242  func isNomadService(id string) bool {
  1243  	return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id)
  1244  }
  1245  
  1246  // isOldNomadService returns true if the ID matches an old pattern managed by
  1247  // Nomad.
  1248  //
  1249  // Pre-0.7.1 task service IDs are of the form:
  1250  //
  1251  //	{nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
  1252  //	Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
  1253  //
  1254  func isOldNomadService(id string) bool {
  1255  	const prefix = nomadServicePrefix + "-executor"
  1256  	return strings.HasPrefix(id, prefix)
  1257  }
  1258  
  1259  // isNomadSidecar returns true if the ID matches a sidecar proxy for a Nomad
  1260  // managed service.
  1261  //
  1262  // For example if you have a Connect enabled service with the ID:
  1263  //
  1264  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db
  1265  //
  1266  // Consul will create a service for the sidecar proxy with the ID:
  1267  //
  1268  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy
  1269  //
  1270  func isNomadSidecar(id string, services map[string]*api.AgentServiceRegistration) bool {
  1271  	const suffix = "-sidecar-proxy"
  1272  	if !strings.HasSuffix(id, suffix) {
  1273  		return false
  1274  	}
  1275  
  1276  	// Make sure the Nomad managed service for this proxy still exists.
  1277  	_, ok := services[id[:len(id)-len(suffix)]]
  1278  	return ok
  1279  }
  1280  
  1281  // getAddress returns the IP and port to use for a service or check. If no port
  1282  // label is specified (an empty value), zero values are returned because no
  1283  // address could be resolved.
  1284  func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet *drivers.DriverNetwork) (string, int, error) {
  1285  	switch addrMode {
  1286  	case structs.AddressModeAuto:
  1287  		if driverNet.Advertise() {
  1288  			addrMode = structs.AddressModeDriver
  1289  		} else {
  1290  			addrMode = structs.AddressModeHost
  1291  		}
  1292  		return getAddress(addrMode, portLabel, networks, driverNet)
  1293  	case structs.AddressModeHost:
  1294  		if portLabel == "" {
  1295  			if len(networks) != 1 {
  1296  				// If no networks are specified return zero
  1297  				// values. Consul will advertise the host IP
  1298  				// with no port. This is the pre-0.7.1 behavior
  1299  				// some people rely on.
  1300  				return "", 0, nil
  1301  			}
  1302  
  1303  			return networks[0].IP, 0, nil
  1304  		}
  1305  
  1306  		// Default path: use host ip:port
  1307  		ip, port := networks.Port(portLabel)
  1308  		if ip == "" && port <= 0 {
  1309  			return "", 0, fmt.Errorf("invalid port %q: port label not found", portLabel)
  1310  		}
  1311  		return ip, port, nil
  1312  
  1313  	case structs.AddressModeDriver:
  1314  		// Require a driver network if driver address mode is used
  1315  		if driverNet == nil {
  1316  			return "", 0, fmt.Errorf(`cannot use address_mode="driver": no driver network exists`)
  1317  		}
  1318  
  1319  		// If no port label is specified just return the IP
  1320  		if portLabel == "" {
  1321  			return driverNet.IP, 0, nil
  1322  		}
  1323  
  1324  		// If the port is a label, use the driver's port (not the host's)
  1325  		if port, ok := driverNet.PortMap[portLabel]; ok {
  1326  			return driverNet.IP, port, nil
  1327  		}
  1328  
  1329  		// If port isn't a label, try to parse it as a literal port number
  1330  		port, err := strconv.Atoi(portLabel)
  1331  		if err != nil {
  1332  			// Don't include Atoi error message as user likely
  1333  			// never intended it to be a numeric and it creates a
  1334  			// confusing error message
  1335  			return "", 0, fmt.Errorf("invalid port label %q: port labels in driver address_mode must be numeric or in the driver's port map", portLabel)
  1336  		}
  1337  		if port <= 0 {
  1338  			return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel)
  1339  		}
  1340  
  1341  		return driverNet.IP, port, nil
  1342  
  1343  	default:
  1344  		// Shouldn't happen due to validation, but enforce invariants
  1345  		return "", 0, fmt.Errorf("invalid address mode %q", addrMode)
  1346  	}
  1347  }
  1348  
  1349  // newConnect creates a new Consul AgentServiceConnect struct based on a Nomad
  1350  // Connect struct. If the nomad Connect struct is nil, nil will be returned to
  1351  // disable Connect for this service.
  1352  func newConnect(serviceName string, nc *structs.ConsulConnect, networks structs.Networks) (*api.AgentServiceConnect, error) {
  1353  	if nc == nil {
  1354  		// No Connect stanza, returning nil is fine
  1355  		return nil, nil
  1356  	}
  1357  
  1358  	cc := &api.AgentServiceConnect{
  1359  		Native: nc.Native,
  1360  	}
  1361  
  1362  	if nc.SidecarService == nil {
  1363  		return cc, nil
  1364  	}
  1365  
  1366  	net, port, err := getConnectPort(serviceName, networks)
  1367  	if err != nil {
  1368  		return nil, err
  1369  	}
  1370  
  1371  	// Bind to netns IP(s):port
  1372  	proxyConfig := map[string]interface{}{}
  1373  	localServiceAddress := ""
  1374  	localServicePort := 0
  1375  	if nc.SidecarService.Proxy != nil {
  1376  		localServiceAddress = nc.SidecarService.Proxy.LocalServiceAddress
  1377  		localServicePort = nc.SidecarService.Proxy.LocalServicePort
  1378  		if nc.SidecarService.Proxy.Config != nil {
  1379  			proxyConfig = nc.SidecarService.Proxy.Config
  1380  		}
  1381  	}
  1382  	proxyConfig["bind_address"] = "0.0.0.0"
  1383  	proxyConfig["bind_port"] = port.To
  1384  
  1385  	// Advertise host IP:port
  1386  	cc.SidecarService = &api.AgentServiceRegistration{
  1387  		Tags:    helper.CopySliceString(nc.SidecarService.Tags),
  1388  		Address: net.IP,
  1389  		Port:    port.Value,
  1390  
  1391  		// Automatically configure the proxy to bind to all addresses
  1392  		// within the netns.
  1393  		Proxy: &api.AgentServiceConnectProxyConfig{
  1394  			LocalServiceAddress: localServiceAddress,
  1395  			LocalServicePort:    localServicePort,
  1396  			Config:              proxyConfig,
  1397  		},
  1398  	}
  1399  
  1400  	// If no further proxy settings were explicitly configured, exit early
  1401  	if nc.SidecarService.Proxy == nil {
  1402  		return cc, nil
  1403  	}
  1404  
  1405  	numUpstreams := len(nc.SidecarService.Proxy.Upstreams)
  1406  	if numUpstreams == 0 {
  1407  		return cc, nil
  1408  	}
  1409  
  1410  	upstreams := make([]api.Upstream, numUpstreams)
  1411  	for i, nu := range nc.SidecarService.Proxy.Upstreams {
  1412  		upstreams[i].DestinationName = nu.DestinationName
  1413  		upstreams[i].LocalBindPort = nu.LocalBindPort
  1414  	}
  1415  	cc.SidecarService.Proxy.Upstreams = upstreams
  1416  
  1417  	return cc, nil
  1418  }
  1419  
  1420  // getConnectPort returns the network and port for the Connect proxy sidecar
  1421  // defined for this service. An error is returned if the network and port
  1422  // cannot be determined.
  1423  func getConnectPort(serviceName string, networks structs.Networks) (*structs.NetworkResource, structs.Port, error) {
  1424  	if n := len(networks); n != 1 {
  1425  		return nil, structs.Port{}, fmt.Errorf("Connect only supported with exactly 1 network (found %d)", n)
  1426  	}
  1427  
  1428  	port, ok := networks[0].PortForService(serviceName)
  1429  	if !ok {
  1430  		return nil, structs.Port{}, fmt.Errorf("No Connect port defined for service %q", serviceName)
  1431  	}
  1432  
  1433  	return networks[0], port, nil
  1434  }