github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/command/agent/consul/service_client.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"net/url"
     8  	"reflect"
     9  	"regexp"
    10  	"strconv"
    11  	"strings"
    12  	"sync"
    13  	"sync/atomic"
    14  	"time"
    15  
    16  	"github.com/armon/go-metrics"
    17  	"github.com/hashicorp/consul/api"
    18  	"github.com/hashicorp/go-hclog"
    19  	"github.com/hashicorp/go-set"
    20  	"github.com/hashicorp/nomad/client/serviceregistration"
    21  	"github.com/hashicorp/nomad/helper"
    22  	"github.com/hashicorp/nomad/helper/envoy"
    23  	"github.com/hashicorp/nomad/nomad/structs"
    24  	"golang.org/x/exp/maps"
    25  	"golang.org/x/exp/slices"
    26  )
    27  
    28  const (
    29  	// nomadServicePrefix is the prefix that scopes all Nomad registered
    30  	// services (both agent and task entries).
    31  	nomadServicePrefix = "_nomad"
    32  
    33  	// nomadServerPrefix is the prefix that scopes Nomad registered Servers.
    34  	nomadServerPrefix = nomadServicePrefix + "-server-"
    35  
    36  	// nomadClientPrefix is the prefix that scopes Nomad registered Clients.
    37  	nomadClientPrefix = nomadServicePrefix + "-client-"
    38  
    39  	// nomadTaskPrefix is the prefix that scopes Nomad registered services
    40  	// for tasks.
    41  	nomadTaskPrefix = nomadServicePrefix + "-task-"
    42  
    43  	// nomadCheckPrefix is the prefix that scopes Nomad registered checks for
    44  	// services.
    45  	nomadCheckPrefix = nomadServicePrefix + "-check-"
    46  
    47  	// defaultRetryInterval is how quickly to retry syncing services and
    48  	// checks to Consul when an error occurs. Will backoff up to a max.
    49  	defaultRetryInterval = time.Second
    50  
    51  	// defaultMaxRetryInterval is the default max retry interval.
    52  	defaultMaxRetryInterval = 30 * time.Second
    53  
    54  	// defaultPeriodicalInterval is the interval at which the service
    55  	// client reconciles state between the desired services and checks and
    56  	// what's actually registered in Consul. This is done at an interval,
    57  	// rather than being purely edge triggered, to handle the case that the
    58  	// Consul agent's state may change underneath us
    59  	defaultPeriodicInterval = 30 * time.Second
    60  
    61  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    62  	// the check result
    63  	ttlCheckBuffer = 31 * time.Second
    64  
    65  	// defaultShutdownWait is how long Shutdown() should block waiting for
    66  	// enqueued operations to sync to Consul by default.
    67  	defaultShutdownWait = time.Minute
    68  
    69  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    70  	// spend waiting for a response from a Consul Query.
    71  	DefaultQueryWaitDuration = 2 * time.Second
    72  
    73  	// ServiceTagHTTP is the tag assigned to HTTP services
    74  	ServiceTagHTTP = "http"
    75  
    76  	// ServiceTagRPC is the tag assigned to RPC services
    77  	ServiceTagRPC = "rpc"
    78  
    79  	// ServiceTagSerf is the tag assigned to Serf services
    80  	ServiceTagSerf = "serf"
    81  
    82  	// deregisterProbationPeriod is the initialization period where
    83  	// services registered in Consul but not in Nomad don't get deregistered,
    84  	// to allow for nomad restoring tasks
    85  	deregisterProbationPeriod = time.Minute
    86  )
    87  
    88  // Additional Consul ACLs required
    89  // - Consul Template: key:read
    90  //   Used in tasks with template stanza that use Consul keys.
    91  
    92  // CatalogAPI is the consul/api.Catalog API used by Nomad.
    93  //
    94  // ACL requirements
    95  // - node:read (listing datacenters)
    96  // - service:read
    97  type CatalogAPI interface {
    98  	Datacenters() ([]string, error)
    99  	Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error)
   100  }
   101  
   102  // NamespaceAPI is the consul/api.Namespace API used by Nomad.
   103  //
   104  // ACL requirements
   105  // - operator:read OR namespace:*:read
   106  type NamespaceAPI interface {
   107  	List(q *api.QueryOptions) ([]*api.Namespace, *api.QueryMeta, error)
   108  }
   109  
   110  // AgentAPI is the consul/api.Agent API used by Nomad.
   111  //
   112  // ACL requirements
   113  // - agent:read
   114  // - service:write
   115  type AgentAPI interface {
   116  	CheckRegister(check *api.AgentCheckRegistration) error
   117  	CheckDeregisterOpts(checkID string, q *api.QueryOptions) error
   118  	ChecksWithFilterOpts(filter string, q *api.QueryOptions) (map[string]*api.AgentCheck, error)
   119  	UpdateTTLOpts(id, output, status string, q *api.QueryOptions) error
   120  
   121  	ServiceRegister(service *api.AgentServiceRegistration) error
   122  	ServiceDeregisterOpts(serviceID string, q *api.QueryOptions) error
   123  	ServicesWithFilterOpts(filter string, q *api.QueryOptions) (map[string]*api.AgentService, error)
   124  
   125  	Self() (map[string]map[string]interface{}, error)
   126  }
   127  
   128  // ConfigAPI is the consul/api.ConfigEntries API subset used by Nomad Server.
   129  //
   130  // ACL requirements
   131  // - operator:write (server only)
   132  type ConfigAPI interface {
   133  	Set(entry api.ConfigEntry, w *api.WriteOptions) (bool, *api.WriteMeta, error)
   134  	// Delete(kind, name string, w *api.WriteOptions) (*api.WriteMeta, error) (not used)
   135  }
   136  
   137  // ACLsAPI is the consul/api.ACL API subset used by Nomad Server.
   138  //
   139  // ACL requirements
   140  // - acl:write (server only)
   141  type ACLsAPI interface {
   142  	TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error) // for lookup via operator token
   143  	PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error)
   144  	RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error)
   145  	TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error)
   146  	TokenDelete(accessorID string, q *api.WriteOptions) (*api.WriteMeta, error)
   147  	TokenList(q *api.QueryOptions) ([]*api.ACLTokenListEntry, *api.QueryMeta, error)
   148  }
   149  
   150  // agentServiceUpdateRequired checks if any critical fields in Nomad's version
   151  // of a service definition are different from the existing service definition as
   152  // known by Consul.
   153  //
   154  //	reason - The syncReason that triggered this synchronization with the consul
   155  //	         agent API.
   156  //	wanted - Nomad's view of what the service definition is intended to be.
   157  //	         Not nil.
   158  //	existing - Consul's view (agent, not catalog) of the actual service definition.
   159  //	         Not nil.
   160  //	sidecar - Consul's view (agent, not catalog) of the service definition of the sidecar
   161  //	         associated with existing that may or may not exist.
   162  //	         May be nil.
   163  func (s *ServiceClient) agentServiceUpdateRequired(reason syncReason, wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool {
   164  	switch reason {
   165  	case syncPeriodic:
   166  		// In a periodic sync with Consul, we need to respect the value of
   167  		// the enable_tag_override field so that we maintain the illusion that the
   168  		// user is in control of the Consul tags, as they may be externally edited
   169  		// via the Consul catalog API (e.g. a user manually sets them).
   170  		//
   171  		// As Consul does by disabling anti-entropy for the tags field, Nomad will
   172  		// ignore differences in the tags field during the periodic syncs with
   173  		// the Consul agent API.
   174  		//
   175  		// We do so by over-writing the nomad service registration by the value
   176  		// of the tags that Consul contains, if enable_tag_override = true.
   177  		maybeTweakTags(wanted, existing, sidecar)
   178  
   179  		// Also, purge tagged address fields of nomad agent services.
   180  		maybeTweakTaggedAddresses(wanted, existing)
   181  
   182  		// Okay now it is safe to compare.
   183  		return s.different(wanted, existing, sidecar)
   184  
   185  	default:
   186  		// A non-periodic sync with Consul indicates an operation has been set
   187  		// on the queue. This happens when service has been added / removed / modified
   188  		// and implies the Consul agent should be sync'd with nomad, because
   189  		// nomad is the ultimate source of truth for the service definition.
   190  
   191  		// But do purge tagged address fields of nomad agent services.
   192  		maybeTweakTaggedAddresses(wanted, existing)
   193  
   194  		// Okay now it is safe to compare.
   195  		return s.different(wanted, existing, sidecar)
   196  	}
   197  }
   198  
   199  // maybeTweakTags will override wanted.Tags with a copy of existing.Tags only if
   200  // EnableTagOverride is true. Otherwise the wanted service registration is left
   201  // unchanged.
   202  func maybeTweakTags(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) {
   203  	if wanted.EnableTagOverride {
   204  		wanted.Tags = slices.Clone(existing.Tags)
   205  		// If the service registration also defines a sidecar service, use the ETO
   206  		// setting for the parent service to also apply to the sidecar.
   207  		if wanted.Connect != nil && wanted.Connect.SidecarService != nil {
   208  			if sidecar != nil {
   209  				wanted.Connect.SidecarService.Tags = slices.Clone(sidecar.Tags)
   210  			}
   211  		}
   212  	}
   213  }
   214  
   215  // maybeTweakTaggedAddresses will remove the Consul-injected .TaggedAddresses fields
   216  // from existing if wanted represents a Nomad agent (Client or Server) or Nomad managed
   217  // service, which do not themselves configure those tagged addresses. We do this
   218  // because Consul will magically set the .TaggedAddress to values Nomad does not
   219  // know about if they are submitted as unset.
   220  func maybeTweakTaggedAddresses(wanted *api.AgentServiceRegistration, existing *api.AgentService) {
   221  	if isNomadAgent(wanted.ID) || isNomadService(wanted.ID) {
   222  		if _, exists := wanted.TaggedAddresses["lan_ipv4"]; !exists {
   223  			delete(existing.TaggedAddresses, "lan_ipv4")
   224  		}
   225  		if _, exists := wanted.TaggedAddresses["wan_ipv4"]; !exists {
   226  			delete(existing.TaggedAddresses, "wan_ipv4")
   227  		}
   228  		if _, exists := wanted.TaggedAddresses["lan_ipv6"]; !exists {
   229  			delete(existing.TaggedAddresses, "lan_ipv6")
   230  		}
   231  		if _, exists := wanted.TaggedAddresses["wan_ipv6"]; !exists {
   232  			delete(existing.TaggedAddresses, "wan_ipv6")
   233  		}
   234  	}
   235  }
   236  
   237  // different compares the wanted state of the service registration with the actual
   238  // (cached) state of the service registration reported by Consul. If any of the
   239  // critical fields are not deeply equal, they considered different.
   240  func (s *ServiceClient) different(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool {
   241  	trace := func(field string, left, right any) {
   242  		s.logger.Trace("registrations different", "id", wanted.ID,
   243  			"field", field, "wanted", fmt.Sprintf("%#v", left), "existing", fmt.Sprintf("%#v", right),
   244  		)
   245  	}
   246  
   247  	switch {
   248  	case wanted.Kind != existing.Kind:
   249  		trace("kind", wanted.Kind, existing.Kind)
   250  		return true
   251  	case wanted.ID != existing.ID:
   252  		trace("id", wanted.ID, existing.ID)
   253  		return true
   254  	case wanted.Port != existing.Port:
   255  		trace("port", wanted.Port, existing.Port)
   256  		return true
   257  	case wanted.Address != existing.Address:
   258  		trace("address", wanted.Address, existing.Address)
   259  		return true
   260  	case wanted.Name != existing.Service:
   261  		trace("service name", wanted.Name, existing.Service)
   262  		return true
   263  	case wanted.EnableTagOverride != existing.EnableTagOverride:
   264  		trace("enable_tag_override", wanted.EnableTagOverride, existing.EnableTagOverride)
   265  		return true
   266  	case !maps.Equal(wanted.Meta, existing.Meta):
   267  		trace("meta", wanted.Meta, existing.Meta)
   268  		return true
   269  	case !maps.Equal(wanted.TaggedAddresses, existing.TaggedAddresses):
   270  		trace("tagged_addresses", wanted.TaggedAddresses, existing.TaggedAddresses)
   271  		return true
   272  	case !helper.SliceSetEq(wanted.Tags, existing.Tags):
   273  		trace("tags", wanted.Tags, existing.Tags)
   274  		return true
   275  	case connectSidecarDifferent(wanted, sidecar):
   276  		trace("connect_sidecar", wanted.Name, existing.Service)
   277  		return true
   278  	}
   279  	return false
   280  }
   281  
   282  // sidecarTagsDifferent includes the special logic for comparing sidecar tags
   283  // from Nomad vs. Consul perspective. Because Consul forces the sidecar tags
   284  // to inherit the parent service tags if the sidecar tags are unset, we need to
   285  // take that into consideration when Nomad's sidecar tags are unset by instead
   286  // comparing them to the parent service tags.
   287  func sidecarTagsDifferent(parent, wanted, sidecar []string) bool {
   288  	if len(wanted) == 0 {
   289  		return !helper.SliceSetEq(parent, sidecar)
   290  	}
   291  	return !helper.SliceSetEq(wanted, sidecar)
   292  }
   293  
   294  // proxyUpstreamsDifferent determines if the sidecar_service.proxy.upstreams
   295  // configurations are different between the desired sidecar service state, and
   296  // the actual sidecar service state currently registered in Consul.
   297  func proxyUpstreamsDifferent(wanted *api.AgentServiceConnect, sidecar *api.AgentServiceConnectProxyConfig) bool {
   298  	// There is similar code that already does this in Nomad's API package,
   299  	// however here we are operating on Consul API package structs, and they do not
   300  	// provide such helper functions.
   301  
   302  	getProxyUpstreams := func(pc *api.AgentServiceConnectProxyConfig) []api.Upstream {
   303  		switch {
   304  		case pc == nil:
   305  			return nil
   306  		case len(pc.Upstreams) == 0:
   307  			return nil
   308  		default:
   309  			return pc.Upstreams
   310  		}
   311  	}
   312  
   313  	getConnectUpstreams := func(sc *api.AgentServiceConnect) []api.Upstream {
   314  		switch {
   315  		case sc.SidecarService.Proxy == nil:
   316  			return nil
   317  		case len(sc.SidecarService.Proxy.Upstreams) == 0:
   318  			return nil
   319  		default:
   320  			return sc.SidecarService.Proxy.Upstreams
   321  		}
   322  	}
   323  
   324  	upstreamsDifferent := func(a, b []api.Upstream) bool {
   325  		if len(a) != len(b) {
   326  			return true
   327  		}
   328  
   329  		for i := 0; i < len(a); i++ {
   330  			A := a[i]
   331  			B := b[i]
   332  			switch {
   333  			case A.Datacenter != B.Datacenter:
   334  				return true
   335  			case A.DestinationName != B.DestinationName:
   336  				return true
   337  			case A.LocalBindAddress != B.LocalBindAddress:
   338  				return true
   339  			case A.LocalBindPort != B.LocalBindPort:
   340  				return true
   341  			case A.MeshGateway.Mode != B.MeshGateway.Mode:
   342  				return true
   343  			case !reflect.DeepEqual(A.Config, B.Config):
   344  				return true
   345  			}
   346  		}
   347  		return false
   348  	}
   349  
   350  	return upstreamsDifferent(
   351  		getConnectUpstreams(wanted),
   352  		getProxyUpstreams(sidecar),
   353  	)
   354  }
   355  
   356  // connectSidecarDifferent returns true if Nomad expects there to be a sidecar
   357  // hanging off the desired parent service definition on the Consul side, and does
   358  // not match with what Consul has.
   359  //
   360  // This is used to determine if the connect sidecar service registration should be
   361  // updated - potentially (but not necessarily) in-place.
   362  func connectSidecarDifferent(wanted *api.AgentServiceRegistration, sidecar *api.AgentService) bool {
   363  	if wanted.Connect != nil && wanted.Connect.SidecarService != nil {
   364  		if sidecar == nil {
   365  			// consul lost our sidecar (?)
   366  			return true
   367  		}
   368  
   369  		if sidecarTagsDifferent(wanted.Tags, wanted.Connect.SidecarService.Tags, sidecar.Tags) {
   370  			// tags on the nomad definition have been modified
   371  			return true
   372  		}
   373  
   374  		if proxyUpstreamsDifferent(wanted.Connect, sidecar.Proxy) {
   375  			// proxy upstreams on the nomad definition have been modified
   376  			return true
   377  		}
   378  	}
   379  
   380  	// Either Nomad does not expect there to be a sidecar_service, or there is
   381  	// no actionable difference from the Consul sidecar_service definition.
   382  	return false
   383  }
   384  
   385  // operations are submitted to the main loop via commit() for synchronizing
   386  // with Consul.
   387  type operations struct {
   388  	regServices   []*api.AgentServiceRegistration
   389  	regChecks     []*api.AgentCheckRegistration
   390  	deregServices []string
   391  	deregChecks   []string
   392  }
   393  
   394  func (o *operations) empty() bool {
   395  	switch {
   396  	case o == nil:
   397  		return true
   398  	case len(o.regServices) > 0:
   399  		return false
   400  	case len(o.regChecks) > 0:
   401  		return false
   402  	case len(o.deregServices) > 0:
   403  		return false
   404  	case len(o.deregChecks) > 0:
   405  		return false
   406  	default:
   407  		return true
   408  	}
   409  }
   410  
   411  func (o *operations) String() string {
   412  	return fmt.Sprintf("<%d, %d, %d, %d>", len(o.regServices), len(o.regChecks), len(o.deregServices), len(o.deregChecks))
   413  }
   414  
   415  // ServiceClient handles task and agent service registration with Consul.
   416  type ServiceClient struct {
   417  	agentAPI         AgentAPI
   418  	namespacesClient *NamespacesClient
   419  
   420  	logger           hclog.Logger
   421  	retryInterval    time.Duration
   422  	maxRetryInterval time.Duration
   423  	periodicInterval time.Duration
   424  
   425  	// exitCh is closed when the main Run loop exits
   426  	exitCh chan struct{}
   427  
   428  	// shutdownCh is closed when the client should shutdown
   429  	shutdownCh chan struct{}
   430  
   431  	// shutdownWait is how long Shutdown() blocks waiting for the final
   432  	// sync() to finish. Defaults to defaultShutdownWait
   433  	shutdownWait time.Duration
   434  
   435  	opCh chan *operations
   436  
   437  	services map[string]*api.AgentServiceRegistration
   438  	checks   map[string]*api.AgentCheckRegistration
   439  
   440  	explicitlyDeregisteredServices *set.Set[string]
   441  	explicitlyDeregisteredChecks   *set.Set[string]
   442  
   443  	// allocRegistrations stores the services and checks that are registered
   444  	// with Consul by allocation ID.
   445  	allocRegistrations     map[string]*serviceregistration.AllocRegistration
   446  	allocRegistrationsLock sync.RWMutex
   447  
   448  	// Nomad agent services and checks that are recorded so they can be removed
   449  	// on shutdown. Defers to consul namespace specified in client consul config.
   450  	agentServices *set.Set[string]
   451  	agentChecks   *set.Set[string]
   452  	agentLock     sync.Mutex
   453  
   454  	// seen is 1 if Consul has ever been seen; otherwise 0. Accessed with
   455  	// atomics.
   456  	seen int32
   457  
   458  	// deregisterProbationExpiry is the time before which consul sync shouldn't deregister
   459  	// unknown services.
   460  	// Used to mitigate risk of deleting restored services upon client restart.
   461  	deregisterProbationExpiry time.Time
   462  
   463  	// checkWatcher restarts checks that are unhealthy.
   464  	checkWatcher *serviceregistration.UniversalCheckWatcher
   465  
   466  	// isClientAgent specifies whether this Consul client is being used
   467  	// by a Nomad client.
   468  	isClientAgent bool
   469  }
   470  
   471  // checkStatusGetter is the consul-specific implementation of serviceregistration.CheckStatusGetter
   472  type checkStatusGetter struct {
   473  	agentAPI         AgentAPI
   474  	namespacesClient *NamespacesClient
   475  }
   476  
   477  func (csg *checkStatusGetter) Get() (map[string]string, error) {
   478  	// Get the list of all namespaces so we can iterate them.
   479  	namespaces, err := csg.namespacesClient.List()
   480  	if err != nil {
   481  		return nil, err
   482  	}
   483  
   484  	results := make(map[string]string)
   485  	for _, namespace := range namespaces {
   486  		resultsInNamespace, err := csg.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
   487  		if err != nil {
   488  			return nil, err
   489  		}
   490  
   491  		for k, v := range resultsInNamespace {
   492  			results[k] = v.Status
   493  		}
   494  	}
   495  	return results, nil
   496  }
   497  
   498  // NewServiceClient creates a new Consul ServiceClient from an existing Consul API
   499  // Client, logger and takes whether the client is being used by a Nomad Client agent.
   500  // When being used by a Nomad client, this Consul client reconciles all services and
   501  // checks created by Nomad on behalf of running tasks.
   502  func NewServiceClient(agentAPI AgentAPI, namespacesClient *NamespacesClient, logger hclog.Logger, isNomadClient bool) *ServiceClient {
   503  	logger = logger.ResetNamed("consul.sync")
   504  	return &ServiceClient{
   505  		agentAPI:                       agentAPI,
   506  		namespacesClient:               namespacesClient,
   507  		logger:                         logger,
   508  		retryInterval:                  defaultRetryInterval,
   509  		maxRetryInterval:               defaultMaxRetryInterval,
   510  		periodicInterval:               defaultPeriodicInterval,
   511  		exitCh:                         make(chan struct{}),
   512  		shutdownCh:                     make(chan struct{}),
   513  		shutdownWait:                   defaultShutdownWait,
   514  		opCh:                           make(chan *operations, 8),
   515  		services:                       make(map[string]*api.AgentServiceRegistration),
   516  		checks:                         make(map[string]*api.AgentCheckRegistration),
   517  		explicitlyDeregisteredServices: set.New[string](0),
   518  		explicitlyDeregisteredChecks:   set.New[string](0),
   519  		allocRegistrations:             make(map[string]*serviceregistration.AllocRegistration),
   520  		agentServices:                  set.New[string](4),
   521  		agentChecks:                    set.New[string](0),
   522  		isClientAgent:                  isNomadClient,
   523  		deregisterProbationExpiry:      time.Now().Add(deregisterProbationPeriod),
   524  		checkWatcher: serviceregistration.NewCheckWatcher(logger, &checkStatusGetter{
   525  			agentAPI:         agentAPI,
   526  			namespacesClient: namespacesClient,
   527  		}),
   528  	}
   529  }
   530  
   531  // seen is used by markSeen and hasSeen
   532  const seen = 1
   533  
   534  // markSeen marks Consul as having been seen (meaning at least one operation
   535  // has succeeded).
   536  func (c *ServiceClient) markSeen() {
   537  	atomic.StoreInt32(&c.seen, seen)
   538  }
   539  
   540  // hasSeen returns true if any Consul operation has ever succeeded. Useful to
   541  // squelch errors if Consul isn't running.
   542  func (c *ServiceClient) hasSeen() bool {
   543  	return atomic.LoadInt32(&c.seen) == seen
   544  }
   545  
   546  // syncReason indicates why a sync operation with consul is about to happen.
   547  //
   548  // The trigger for a sync may have implications on the behavior of the sync itself.
   549  // In particular if a service is defined with enable_tag_override=true, the sync
   550  // should ignore changes to the service's Tags field.
   551  type syncReason byte
   552  
   553  const (
   554  	syncPeriodic syncReason = iota
   555  	syncShutdown
   556  	syncNewOps
   557  )
   558  
   559  func (sr syncReason) String() string {
   560  	switch sr {
   561  	case syncPeriodic:
   562  		return "periodic"
   563  	case syncShutdown:
   564  		return "shutdown"
   565  	case syncNewOps:
   566  		return "operations"
   567  	default:
   568  		return "unexpected"
   569  	}
   570  }
   571  
   572  // Run the Consul main loop which retries operations against Consul. It should
   573  // be called exactly once.
   574  func (c *ServiceClient) Run() {
   575  	defer close(c.exitCh)
   576  
   577  	ctx, cancel := context.WithCancel(context.Background())
   578  	defer cancel()
   579  
   580  	// init will be closed when Consul has been contacted
   581  	init := make(chan struct{})
   582  	go checkConsulTLSSkipVerify(ctx, c.logger, c.agentAPI, init)
   583  
   584  	// Process operations while waiting for initial contact with Consul but
   585  	// do not sync until contact has been made.
   586  INIT:
   587  	for {
   588  		select {
   589  		case <-init:
   590  			c.markSeen()
   591  			break INIT
   592  		case <-c.shutdownCh:
   593  			return
   594  		case ops := <-c.opCh:
   595  			c.merge(ops)
   596  		}
   597  	}
   598  	c.logger.Trace("able to contact Consul")
   599  
   600  	// Block until contact with Consul has been established
   601  	// Start checkWatcher
   602  	go c.checkWatcher.Run(ctx)
   603  
   604  	// Always immediately sync to reconcile Nomad and Consul's state
   605  	retryTimer := time.NewTimer(0)
   606  
   607  	failures := 0
   608  	for {
   609  		// On every iteration take note of what the trigger for the next sync
   610  		// was, so that it may be referenced during the sync itself.
   611  		var reasonForSync syncReason
   612  
   613  		select {
   614  		case <-retryTimer.C:
   615  			reasonForSync = syncPeriodic
   616  		case <-c.shutdownCh:
   617  			reasonForSync = syncShutdown
   618  			// Cancel check watcher but sync one last time
   619  			cancel()
   620  		case ops := <-c.opCh:
   621  			reasonForSync = syncNewOps
   622  			c.merge(ops)
   623  		}
   624  
   625  		if err := c.sync(reasonForSync); err != nil {
   626  			if failures == 0 {
   627  				// Log on the first failure
   628  				c.logger.Warn("failed to update services in Consul", "error", err)
   629  			} else if failures%10 == 0 {
   630  				// Log every 10th consecutive failure
   631  				c.logger.Error("still unable to update services in Consul", "failures", failures, "error", err)
   632  			}
   633  
   634  			failures++
   635  			if !retryTimer.Stop() {
   636  				// Timer already expired, since the timer may
   637  				// or may not have been read in the select{}
   638  				// above, conditionally receive on it
   639  				select {
   640  				case <-retryTimer.C:
   641  				default:
   642  				}
   643  			}
   644  			backoff := c.retryInterval * time.Duration(failures)
   645  			if backoff > c.maxRetryInterval {
   646  				backoff = c.maxRetryInterval
   647  			}
   648  			retryTimer.Reset(backoff)
   649  		} else {
   650  			if failures > 0 {
   651  				c.logger.Info("successfully updated services in Consul")
   652  				failures = 0
   653  			}
   654  
   655  			// on successful sync, clear deregistered consul entities
   656  			c.clearExplicitlyDeregistered()
   657  
   658  			// Reset timer to periodic interval to periodically
   659  			// reconile with Consul
   660  			if !retryTimer.Stop() {
   661  				select {
   662  				case <-retryTimer.C:
   663  				default:
   664  				}
   665  			}
   666  			retryTimer.Reset(c.periodicInterval)
   667  		}
   668  
   669  		select {
   670  		case <-c.shutdownCh:
   671  			// Exit only after sync'ing all outstanding operations
   672  			if len(c.opCh) > 0 {
   673  				for len(c.opCh) > 0 {
   674  					c.merge(<-c.opCh)
   675  				}
   676  				continue
   677  			}
   678  			return
   679  		default:
   680  		}
   681  
   682  	}
   683  }
   684  
   685  // commit operations unless already shutting down.
   686  func (c *ServiceClient) commit(ops *operations) {
   687  	c.logger.Trace("commit sync operations", "ops", ops)
   688  
   689  	// Ignore empty operations - ideally callers will optimize out syncs with
   690  	// nothing to do, but be defensive anyway. Sending an empty ops on the chan
   691  	// will trigger an unnecessary sync with Consul.
   692  	if ops.empty() {
   693  		return
   694  	}
   695  
   696  	// Prioritize doing nothing if we are being signaled to shutdown.
   697  	select {
   698  	case <-c.shutdownCh:
   699  		return
   700  	default:
   701  	}
   702  
   703  	// Send the ops down the ops chan, triggering a sync with Consul. Unless we
   704  	// receive a signal to shutdown.
   705  	select {
   706  	case c.opCh <- ops:
   707  	case <-c.shutdownCh:
   708  	}
   709  }
   710  
   711  func (c *ServiceClient) clearExplicitlyDeregistered() {
   712  	c.explicitlyDeregisteredServices = set.New[string](0)
   713  	c.explicitlyDeregisteredChecks = set.New[string](0)
   714  }
   715  
   716  // merge registrations into state map prior to sync'ing with Consul
   717  func (c *ServiceClient) merge(ops *operations) {
   718  	for _, s := range ops.regServices {
   719  		c.services[s.ID] = s
   720  	}
   721  	for _, check := range ops.regChecks {
   722  		c.checks[check.ID] = check
   723  	}
   724  	for _, sid := range ops.deregServices {
   725  		delete(c.services, sid)
   726  		c.explicitlyDeregisteredServices.Insert(sid)
   727  	}
   728  	for _, cid := range ops.deregChecks {
   729  		delete(c.checks, cid)
   730  		c.explicitlyDeregisteredChecks.Insert(cid)
   731  	}
   732  	metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
   733  	metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
   734  }
   735  
   736  // sync enqueued operations.
   737  func (c *ServiceClient) sync(reason syncReason) error {
   738  	c.logger.Trace("execute sync", "reason", reason)
   739  
   740  	sreg, creg, sdereg, cdereg := 0, 0, 0, 0
   741  	var err error
   742  
   743  	// Get the list of all namespaces created so we can iterate them.
   744  	namespaces, err := c.namespacesClient.List()
   745  	if err != nil {
   746  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   747  		return fmt.Errorf("failed to query Consul namespaces: %w", err)
   748  	}
   749  
   750  	// Accumulate all services in Consul across all namespaces.
   751  	servicesInConsul := make(map[string]*api.AgentService)
   752  	for _, namespace := range namespaces {
   753  		if nsServices, err := c.agentAPI.ServicesWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)}); err != nil {
   754  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   755  			return fmt.Errorf("failed to query Consul services: %w", err)
   756  		} else {
   757  			for k, v := range nsServices {
   758  				servicesInConsul[k] = v
   759  			}
   760  		}
   761  	}
   762  
   763  	// Compute whether we are still in probation period where we will avoid
   764  	// de-registering services.
   765  	inProbation := time.Now().Before(c.deregisterProbationExpiry)
   766  
   767  	// Remove Nomad services in Consul but unknown to Nomad.
   768  	for id := range servicesInConsul {
   769  		if _, ok := c.services[id]; ok {
   770  			// Known service, skip
   771  			continue
   772  		}
   773  
   774  		// Ignore if this is not a Nomad managed service. Also ignore
   775  		// Nomad managed services if this is not a client agent.
   776  		// This is to prevent server agents from removing services
   777  		// registered by client agents
   778  		if !isNomadService(id) || !c.isClientAgent {
   779  			// Not managed by Nomad, skip
   780  			continue
   781  		}
   782  
   783  		// Ignore unknown services during probation
   784  		if inProbation && !c.explicitlyDeregisteredServices.Contains(id) {
   785  			continue
   786  		}
   787  
   788  		// Ignore if this is a service for a Nomad managed sidecar proxy.
   789  		if maybeConnectSidecar(id) {
   790  			continue
   791  		}
   792  
   793  		// Get the Consul namespace this service is in.
   794  		ns := servicesInConsul[id].Namespace
   795  
   796  		// If this service has a sidecar, we need to remove the sidecar first,
   797  		// otherwise Consul will produce a warning and an error when removing
   798  		// the parent service.
   799  		//
   800  		// The sidecar is not tracked on the Nomad side; it was registered
   801  		// implicitly through the parent service.
   802  		if sidecar := getNomadSidecar(id, servicesInConsul); sidecar != nil {
   803  			if err := c.agentAPI.ServiceDeregisterOpts(sidecar.ID, &api.QueryOptions{Namespace: ns}); err != nil {
   804  				metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   805  				return err
   806  			}
   807  		}
   808  
   809  		// Remove the unwanted service.
   810  		if err := c.agentAPI.ServiceDeregisterOpts(id, &api.QueryOptions{Namespace: ns}); err != nil {
   811  			if isOldNomadService(id) {
   812  				// Don't hard-fail on old entries. See #3620
   813  				continue
   814  			}
   815  
   816  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   817  			return err
   818  		}
   819  		sdereg++
   820  		metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1)
   821  	}
   822  
   823  	// Add Nomad managed services missing in Consul, or updated via Nomad.
   824  	for id, serviceInNomad := range c.services {
   825  		serviceInConsul, exists := servicesInConsul[id]
   826  		sidecarInConsul := getNomadSidecar(id, servicesInConsul)
   827  
   828  		if !exists || c.agentServiceUpdateRequired(reason, serviceInNomad, serviceInConsul, sidecarInConsul) {
   829  			c.logger.Trace("must register service", "id", id, "exists", exists, "reason", reason)
   830  			if err = c.agentAPI.ServiceRegister(serviceInNomad); err != nil {
   831  				metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   832  				return err
   833  			}
   834  			sreg++
   835  			metrics.IncrCounter([]string{"client", "consul", "service_registrations"}, 1)
   836  		}
   837  
   838  	}
   839  
   840  	checksInConsul := make(map[string]*api.AgentCheck)
   841  	for _, namespace := range namespaces {
   842  		nsChecks, err := c.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
   843  		if err != nil {
   844  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   845  			return fmt.Errorf("failed to query Consul checks: %w", err)
   846  		}
   847  		for k, v := range nsChecks {
   848  			checksInConsul[k] = v
   849  		}
   850  	}
   851  
   852  	// Remove Nomad checks in Consul but unknown locally
   853  	for id, check := range checksInConsul {
   854  		if _, ok := c.checks[id]; ok {
   855  			// Known check, leave it
   856  			continue
   857  		}
   858  
   859  		// Ignore if this is not a Nomad managed check. Also ignore
   860  		// Nomad managed checks if this is not a client agent.
   861  		// This is to prevent server agents from removing checks
   862  		// registered by client agents
   863  		if !isNomadService(check.ServiceID) || !c.isClientAgent || !isNomadCheck(check.CheckID) {
   864  			// Service not managed by Nomad, skip
   865  			continue
   866  		}
   867  
   868  		// Ignore unknown services during probation
   869  		if inProbation && !c.explicitlyDeregisteredChecks.Contains(id) {
   870  			continue
   871  		}
   872  
   873  		// Ignore if this is a check for a Nomad managed sidecar proxy.
   874  		if maybeSidecarProxyCheck(id) {
   875  			continue
   876  		}
   877  
   878  		// Unknown Nomad managed check; remove
   879  		if err := c.agentAPI.CheckDeregisterOpts(id, &api.QueryOptions{Namespace: check.Namespace}); err != nil {
   880  			if isOldNomadService(check.ServiceID) {
   881  				// Don't hard-fail on old entries.
   882  				continue
   883  			}
   884  
   885  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   886  			return err
   887  		}
   888  		cdereg++
   889  		metrics.IncrCounter([]string{"client", "consul", "check_deregistrations"}, 1)
   890  	}
   891  
   892  	// Add Nomad checks missing from Consul
   893  	for id, check := range c.checks {
   894  		if _, ok := checksInConsul[id]; ok {
   895  			// Already in Consul; skipping
   896  			continue
   897  		}
   898  		if err := c.agentAPI.CheckRegister(check); err != nil {
   899  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   900  			return err
   901  		}
   902  		creg++
   903  		metrics.IncrCounter([]string{"client", "consul", "check_registrations"}, 1)
   904  	}
   905  
   906  	// Only log if something was actually synced
   907  	if sreg > 0 || sdereg > 0 || creg > 0 || cdereg > 0 {
   908  		c.logger.Debug("sync complete", "registered_services", sreg, "deregistered_services", sdereg,
   909  			"registered_checks", creg, "deregistered_checks", cdereg)
   910  	}
   911  	return nil
   912  }
   913  
   914  // RegisterAgent registers Nomad agents (client or server). The
   915  // Service.PortLabel should be a literal port to be parsed with SplitHostPort.
   916  // Script checks are not supported and will return an error. Registration is
   917  // asynchronous.
   918  //
   919  // Agents will be deregistered when Shutdown is called.
   920  //
   921  // Note: no need to manually plumb Consul namespace into the agent service registration
   922  // or its check registrations, because the Nomad Client's Consul Client will already
   923  // have the Nomad Client's Consul Namespace set on startup.
   924  func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error {
   925  	ops := operations{}
   926  
   927  	for _, service := range services {
   928  		id := makeAgentServiceID(role, service)
   929  
   930  		// Unlike tasks, agents don't use port labels. Agent ports are
   931  		// stored directly in the PortLabel.
   932  		host, rawport, err := net.SplitHostPort(service.PortLabel)
   933  		if err != nil {
   934  			return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err)
   935  		}
   936  		port, err := strconv.Atoi(rawport)
   937  		if err != nil {
   938  			return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err)
   939  		}
   940  		serviceReg := &api.AgentServiceRegistration{
   941  			ID:      id,
   942  			Name:    service.Name,
   943  			Tags:    service.Tags,
   944  			Address: host,
   945  			Port:    port,
   946  			// This enables the consul UI to show that Nomad registered this service
   947  			Meta: map[string]string{
   948  				"external-source": "nomad",
   949  			},
   950  		}
   951  		ops.regServices = append(ops.regServices, serviceReg)
   952  
   953  		for _, check := range service.Checks {
   954  			checkID := MakeCheckID(id, check)
   955  			if check.Type == structs.ServiceCheckScript {
   956  				return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name)
   957  			}
   958  			checkHost, checkPort := serviceReg.Address, serviceReg.Port
   959  			if check.PortLabel != "" {
   960  				// Unlike tasks, agents don't use port labels. Agent ports are
   961  				// stored directly in the PortLabel.
   962  				host, rawport, err := net.SplitHostPort(check.PortLabel)
   963  				if err != nil {
   964  					return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err)
   965  				}
   966  				port, err := strconv.Atoi(rawport)
   967  				if err != nil {
   968  					return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err)
   969  				}
   970  				checkHost, checkPort = host, port
   971  			}
   972  			checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort, "")
   973  			if err != nil {
   974  				return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   975  			}
   976  			ops.regChecks = append(ops.regChecks, checkReg)
   977  		}
   978  	}
   979  
   980  	// Don't bother committing agent checks if we're already shutting down
   981  	c.agentLock.Lock()
   982  	defer c.agentLock.Unlock()
   983  	select {
   984  	case <-c.shutdownCh:
   985  		return nil
   986  	default:
   987  	}
   988  
   989  	// Now add them to the registration queue
   990  	c.commit(&ops)
   991  
   992  	// Record IDs for deregistering on shutdown
   993  	for _, id := range ops.regServices {
   994  		c.agentServices.Insert(id.ID)
   995  	}
   996  	for _, id := range ops.regChecks {
   997  		c.agentChecks.Insert(id.ID)
   998  	}
   999  	return nil
  1000  }
  1001  
  1002  // serviceRegs creates service registrations, check registrations, and script
  1003  // checks from a service. It returns a service registration object with the
  1004  // service and check IDs populated.
  1005  func (c *ServiceClient) serviceRegs(
  1006  	ops *operations,
  1007  	service *structs.Service,
  1008  	workload *serviceregistration.WorkloadServices,
  1009  ) (*serviceregistration.ServiceRegistration, error) {
  1010  
  1011  	// Get the services ID
  1012  	id := serviceregistration.MakeAllocServiceID(workload.AllocInfo.AllocID, workload.Name(), service)
  1013  	sreg := &serviceregistration.ServiceRegistration{
  1014  		ServiceID:     id,
  1015  		CheckIDs:      make(map[string]struct{}, len(service.Checks)),
  1016  		CheckOnUpdate: make(map[string]string, len(service.Checks)),
  1017  	}
  1018  
  1019  	// Service address modes default to auto
  1020  	addrMode := service.AddressMode
  1021  	if addrMode == "" {
  1022  		addrMode = structs.AddressModeAuto
  1023  	}
  1024  
  1025  	// Determine the address to advertise based on the mode
  1026  	ip, port, err := serviceregistration.GetAddress(
  1027  		service.Address, addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus)
  1028  	if err != nil {
  1029  		return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err)
  1030  	}
  1031  
  1032  	// Determine whether to use tags or canary_tags
  1033  	var tags []string
  1034  	if workload.Canary && len(service.CanaryTags) > 0 {
  1035  		tags = make([]string, len(service.CanaryTags))
  1036  		copy(tags, service.CanaryTags)
  1037  	} else {
  1038  		tags = make([]string, len(service.Tags))
  1039  		copy(tags, service.Tags)
  1040  	}
  1041  
  1042  	// newConnect returns (nil, nil) if there's no Connect-enabled service.
  1043  	connect, err := newConnect(id, workload.AllocInfo, service.Name, service.Connect, workload.Networks, workload.Ports)
  1044  	if err != nil {
  1045  		return nil, fmt.Errorf("invalid Consul Connect configuration for service %q: %v", service.Name, err)
  1046  	}
  1047  
  1048  	// newConnectGateway returns nil if there's no Connect gateway.
  1049  	gateway := newConnectGateway(service.Connect)
  1050  
  1051  	// Determine whether to use meta or canary_meta
  1052  	var meta map[string]string
  1053  	if workload.Canary && len(service.CanaryMeta) > 0 {
  1054  		meta = make(map[string]string, len(service.CanaryMeta)+1)
  1055  		for k, v := range service.CanaryMeta {
  1056  			meta[k] = v
  1057  		}
  1058  	} else {
  1059  		meta = make(map[string]string, len(service.Meta)+1)
  1060  		for k, v := range service.Meta {
  1061  			meta[k] = v
  1062  		}
  1063  	}
  1064  
  1065  	// This enables the consul UI to show that Nomad registered this service
  1066  	meta["external-source"] = "nomad"
  1067  
  1068  	// Explicitly set the Consul service Kind in case this service represents
  1069  	// one of the Connect gateway types.
  1070  	kind := api.ServiceKindTypical
  1071  	switch {
  1072  	case service.Connect.IsIngress():
  1073  		kind = api.ServiceKindIngressGateway
  1074  	case service.Connect.IsTerminating():
  1075  		kind = api.ServiceKindTerminatingGateway
  1076  
  1077  		if proxy := service.Connect.Gateway.Proxy; proxy != nil {
  1078  			// set the default port if bridge / default listener set
  1079  			if defaultBind, exists := proxy.EnvoyGatewayBindAddresses["default"]; exists {
  1080  				portLabel := envoy.PortLabel(structs.ConnectTerminatingPrefix, service.Name, "")
  1081  				if dynPort, ok := workload.Ports.Get(portLabel); ok {
  1082  					defaultBind.Port = dynPort.Value
  1083  				}
  1084  			}
  1085  		}
  1086  	case service.Connect.IsMesh():
  1087  		kind = api.ServiceKindMeshGateway
  1088  
  1089  		if proxy := service.Connect.Gateway.Proxy; proxy != nil {
  1090  			// wan uses the service port label, which is typically on a discrete host_network
  1091  			if wanBind, exists := proxy.EnvoyGatewayBindAddresses["wan"]; exists {
  1092  				if wanPort, ok := workload.Ports.Get(service.PortLabel); ok {
  1093  					wanBind.Port = wanPort.Value
  1094  				}
  1095  			}
  1096  			// lan uses a nomad generated dynamic port on the default network
  1097  			if lanBind, exists := proxy.EnvoyGatewayBindAddresses["lan"]; exists {
  1098  				portLabel := envoy.PortLabel(structs.ConnectMeshPrefix, service.Name, "lan")
  1099  				if dynPort, ok := workload.Ports.Get(portLabel); ok {
  1100  					lanBind.Port = dynPort.Value
  1101  				}
  1102  			}
  1103  		}
  1104  	}
  1105  
  1106  	taggedAddresses, err := parseTaggedAddresses(service.TaggedAddresses, port)
  1107  	if err != nil {
  1108  		return nil, err
  1109  	}
  1110  
  1111  	// Build the Consul Service registration request
  1112  	serviceReg := &api.AgentServiceRegistration{
  1113  		Kind:              kind,
  1114  		ID:                id,
  1115  		Name:              service.Name,
  1116  		Namespace:         workload.ProviderNamespace,
  1117  		Tags:              tags,
  1118  		EnableTagOverride: service.EnableTagOverride,
  1119  		Address:           ip,
  1120  		Port:              port,
  1121  		Meta:              meta,
  1122  		TaggedAddresses:   taggedAddresses,
  1123  		Connect:           connect, // will be nil if no Connect stanza
  1124  		Proxy:             gateway, // will be nil if no Connect Gateway stanza
  1125  		Checks:            make([]*api.AgentServiceCheck, 0, len(service.Checks)),
  1126  	}
  1127  	ops.regServices = append(ops.regServices, serviceReg)
  1128  
  1129  	// Build the check registrations
  1130  	checkRegs, err := c.checkRegs(id, service, workload, sreg)
  1131  	if err != nil {
  1132  		return nil, err
  1133  	}
  1134  
  1135  	for _, registration := range checkRegs {
  1136  		sreg.CheckIDs[registration.ID] = struct{}{}
  1137  		ops.regChecks = append(ops.regChecks, registration)
  1138  		serviceReg.Checks = append(
  1139  			serviceReg.Checks,
  1140  			apiCheckRegistrationToCheck(registration),
  1141  		)
  1142  	}
  1143  
  1144  	return sreg, nil
  1145  }
  1146  
  1147  // apiCheckRegistrationToCheck converts a check registration to a check, so that
  1148  // we can include them in the initial service registration. It is expected the
  1149  // Nomad-conversion (e.g. turning script checks into ttl checks) has already been
  1150  // applied.
  1151  func apiCheckRegistrationToCheck(r *api.AgentCheckRegistration) *api.AgentServiceCheck {
  1152  	return &api.AgentServiceCheck{
  1153  		CheckID:                r.ID,
  1154  		Name:                   r.Name,
  1155  		Interval:               r.Interval,
  1156  		Timeout:                r.Timeout,
  1157  		TTL:                    r.TTL,
  1158  		HTTP:                   r.HTTP,
  1159  		Header:                 maps.Clone(r.Header),
  1160  		Method:                 r.Method,
  1161  		Body:                   r.Body,
  1162  		TCP:                    r.TCP,
  1163  		Status:                 r.Status,
  1164  		TLSSkipVerify:          r.TLSSkipVerify,
  1165  		GRPC:                   r.GRPC,
  1166  		GRPCUseTLS:             r.GRPCUseTLS,
  1167  		SuccessBeforePassing:   r.SuccessBeforePassing,
  1168  		FailuresBeforeCritical: r.FailuresBeforeCritical,
  1169  	}
  1170  }
  1171  
  1172  // checkRegs creates check registrations for the given service
  1173  func (c *ServiceClient) checkRegs(
  1174  	serviceID string,
  1175  	service *structs.Service,
  1176  	workload *serviceregistration.WorkloadServices,
  1177  	sreg *serviceregistration.ServiceRegistration,
  1178  ) ([]*api.AgentCheckRegistration, error) {
  1179  
  1180  	registrations := make([]*api.AgentCheckRegistration, 0, len(service.Checks))
  1181  	for _, check := range service.Checks {
  1182  		var ip string
  1183  		var port int
  1184  
  1185  		if check.Type != structs.ServiceCheckScript {
  1186  			portLabel := check.PortLabel
  1187  			if portLabel == "" {
  1188  				portLabel = service.PortLabel
  1189  			}
  1190  
  1191  			addrMode := check.AddressMode
  1192  			if addrMode == "" {
  1193  				if service.Address != "" {
  1194  					// if the service is using a custom address, enable the check
  1195  					// to use that address
  1196  					addrMode = structs.AddressModeAuto
  1197  				} else {
  1198  					// otherwise default to the host address
  1199  					addrMode = structs.AddressModeHost
  1200  				}
  1201  			}
  1202  
  1203  			var err error
  1204  			ip, port, err = serviceregistration.GetAddress(
  1205  				service.Address, addrMode, portLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus)
  1206  			if err != nil {
  1207  				return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err)
  1208  			}
  1209  		}
  1210  
  1211  		checkID := MakeCheckID(serviceID, check)
  1212  		registration, err := createCheckReg(serviceID, checkID, check, ip, port, workload.ProviderNamespace)
  1213  		if err != nil {
  1214  			return nil, fmt.Errorf("failed to add check %q: %v", check.Name, err)
  1215  		}
  1216  		sreg.CheckOnUpdate[checkID] = check.OnUpdate
  1217  		registrations = append(registrations, registration)
  1218  	}
  1219  
  1220  	return registrations, nil
  1221  }
  1222  
  1223  // RegisterWorkload with Consul. Adds all service entries and checks to Consul.
  1224  //
  1225  // If the service IP is set it used as the address in the service registration.
  1226  // Checks will always use the IP from the Task struct (host's IP).
  1227  //
  1228  // Actual communication with Consul is done asynchronously (see Run).
  1229  func (c *ServiceClient) RegisterWorkload(workload *serviceregistration.WorkloadServices) error {
  1230  	// Fast path
  1231  	numServices := len(workload.Services)
  1232  	if numServices == 0 {
  1233  		return nil
  1234  	}
  1235  
  1236  	t := new(serviceregistration.ServiceRegistrations)
  1237  	t.Services = make(map[string]*serviceregistration.ServiceRegistration, numServices)
  1238  
  1239  	ops := &operations{}
  1240  	for _, service := range workload.Services {
  1241  		sreg, err := c.serviceRegs(ops, service, workload)
  1242  		if err != nil {
  1243  			return err
  1244  		}
  1245  		t.Services[sreg.ServiceID] = sreg
  1246  	}
  1247  
  1248  	// Add the workload to the allocation's registration
  1249  	c.addRegistrations(workload.AllocInfo.AllocID, workload.Name(), t)
  1250  
  1251  	c.commit(ops)
  1252  
  1253  	// Start watching checks. Done after service registrations are built
  1254  	// since an error building them could leak watches.
  1255  	for _, service := range workload.Services {
  1256  		serviceID := serviceregistration.MakeAllocServiceID(workload.AllocInfo.AllocID, workload.Name(), service)
  1257  		for _, check := range service.Checks {
  1258  			if check.TriggersRestarts() {
  1259  				checkID := MakeCheckID(serviceID, check)
  1260  				c.checkWatcher.Watch(workload.AllocInfo.AllocID, workload.Name(), checkID, check, workload.Restarter)
  1261  			}
  1262  		}
  1263  	}
  1264  	return nil
  1265  }
  1266  
  1267  // UpdateWorkload in Consul. Does not alter the service if only checks have
  1268  // changed.
  1269  //
  1270  // DriverNetwork must not change between invocations for the same allocation.
  1271  func (c *ServiceClient) UpdateWorkload(old, newWorkload *serviceregistration.WorkloadServices) error {
  1272  	ops := new(operations)
  1273  	regs := new(serviceregistration.ServiceRegistrations)
  1274  	regs.Services = make(map[string]*serviceregistration.ServiceRegistration, len(newWorkload.Services))
  1275  
  1276  	newIDs := make(map[string]*structs.Service, len(newWorkload.Services))
  1277  	for _, s := range newWorkload.Services {
  1278  		newIDs[serviceregistration.MakeAllocServiceID(newWorkload.AllocInfo.AllocID, newWorkload.Name(), s)] = s
  1279  	}
  1280  
  1281  	// Loop over existing Services to see if they have been removed
  1282  	for _, existingSvc := range old.Services {
  1283  		existingID := serviceregistration.MakeAllocServiceID(old.AllocInfo.AllocID, old.Name(), existingSvc)
  1284  		newSvc, ok := newIDs[existingID]
  1285  
  1286  		if !ok {
  1287  			// Existing service entry removed
  1288  			ops.deregServices = append(ops.deregServices, existingID)
  1289  			for _, check := range existingSvc.Checks {
  1290  				cid := MakeCheckID(existingID, check)
  1291  				ops.deregChecks = append(ops.deregChecks, cid)
  1292  
  1293  				// Unwatch watched checks
  1294  				if check.TriggersRestarts() {
  1295  					c.checkWatcher.Unwatch(cid)
  1296  				}
  1297  			}
  1298  			continue
  1299  		}
  1300  
  1301  		oldHash := existingSvc.Hash(old.AllocInfo.AllocID, old.Name(), old.Canary)
  1302  		newHash := newSvc.Hash(newWorkload.AllocInfo.AllocID, newWorkload.Name(), newWorkload.Canary)
  1303  		if oldHash == newHash {
  1304  			// Service exists and hasn't changed, don't re-add it later
  1305  			delete(newIDs, existingID)
  1306  		}
  1307  
  1308  		// Service still exists so add it to the task's registration
  1309  		sreg := &serviceregistration.ServiceRegistration{
  1310  			ServiceID:     existingID,
  1311  			CheckIDs:      make(map[string]struct{}, len(newSvc.Checks)),
  1312  			CheckOnUpdate: make(map[string]string, len(newSvc.Checks)),
  1313  		}
  1314  		regs.Services[existingID] = sreg
  1315  
  1316  		// See if any checks were updated
  1317  		existingChecks := make(map[string]*structs.ServiceCheck, len(existingSvc.Checks))
  1318  		for _, check := range existingSvc.Checks {
  1319  			existingChecks[MakeCheckID(existingID, check)] = check
  1320  		}
  1321  
  1322  		// Register new checks
  1323  		for _, check := range newSvc.Checks {
  1324  			checkID := MakeCheckID(existingID, check)
  1325  			if _, exists := existingChecks[checkID]; exists {
  1326  				// Check is still required. Remove it from the map so it doesn't get
  1327  				// deleted later.
  1328  				delete(existingChecks, checkID)
  1329  				sreg.CheckIDs[checkID] = struct{}{}
  1330  				sreg.CheckOnUpdate[checkID] = check.OnUpdate
  1331  			}
  1332  
  1333  			// New check on an unchanged service; add them now
  1334  			checkRegs, err := c.checkRegs(existingID, newSvc, newWorkload, sreg)
  1335  			if err != nil {
  1336  				return err
  1337  			}
  1338  
  1339  			for _, registration := range checkRegs {
  1340  				sreg.CheckIDs[registration.ID] = struct{}{}
  1341  				sreg.CheckOnUpdate[registration.ID] = check.OnUpdate
  1342  				ops.regChecks = append(ops.regChecks, registration)
  1343  			}
  1344  
  1345  			// Update all watched checks as CheckRestart fields aren't part of ID
  1346  			if check.TriggersRestarts() {
  1347  				c.checkWatcher.Watch(newWorkload.AllocInfo.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
  1348  			}
  1349  		}
  1350  
  1351  		// Remove existing checks not in updated service
  1352  		for cid, check := range existingChecks {
  1353  			ops.deregChecks = append(ops.deregChecks, cid)
  1354  
  1355  			// Unwatch checks
  1356  			if check.TriggersRestarts() {
  1357  				c.checkWatcher.Unwatch(cid)
  1358  			}
  1359  		}
  1360  	}
  1361  
  1362  	// Any remaining services should just be enqueued directly
  1363  	for _, newSvc := range newIDs {
  1364  		sreg, err := c.serviceRegs(ops, newSvc, newWorkload)
  1365  		if err != nil {
  1366  			return err
  1367  		}
  1368  
  1369  		regs.Services[sreg.ServiceID] = sreg
  1370  	}
  1371  
  1372  	// Add the task to the allocation's registration
  1373  	c.addRegistrations(newWorkload.AllocInfo.AllocID, newWorkload.Name(), regs)
  1374  
  1375  	c.commit(ops)
  1376  
  1377  	// Start watching checks. Done after service registrations are built
  1378  	// since an error building them could leak watches.
  1379  	for serviceID, service := range newIDs {
  1380  		for _, check := range service.Checks {
  1381  			if check.TriggersRestarts() {
  1382  				checkID := MakeCheckID(serviceID, check)
  1383  				c.checkWatcher.Watch(newWorkload.AllocInfo.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
  1384  			}
  1385  		}
  1386  	}
  1387  
  1388  	return nil
  1389  }
  1390  
  1391  // RemoveWorkload from Consul. Removes all service entries and checks.
  1392  //
  1393  // Actual communication with Consul is done asynchronously (see Run).
  1394  func (c *ServiceClient) RemoveWorkload(workload *serviceregistration.WorkloadServices) {
  1395  	ops := operations{}
  1396  
  1397  	for _, service := range workload.Services {
  1398  		id := serviceregistration.MakeAllocServiceID(workload.AllocInfo.AllocID, workload.Name(), service)
  1399  		ops.deregServices = append(ops.deregServices, id)
  1400  
  1401  		for _, check := range service.Checks {
  1402  			cid := MakeCheckID(id, check)
  1403  			ops.deregChecks = append(ops.deregChecks, cid)
  1404  
  1405  			if check.TriggersRestarts() {
  1406  				c.checkWatcher.Unwatch(cid)
  1407  			}
  1408  		}
  1409  	}
  1410  
  1411  	// Remove the workload from the alloc's registrations
  1412  	c.removeRegistration(workload.AllocInfo.AllocID, workload.Name())
  1413  
  1414  	// Now add them to the deregistration fields; main Run loop will update
  1415  	c.commit(&ops)
  1416  }
  1417  
  1418  // normalizeNamespace will turn the "default" namespace into the empty string,
  1419  // so that Consul OSS will not produce an error setting something in the default
  1420  // namespace.
  1421  func normalizeNamespace(namespace string) string {
  1422  	if namespace == "default" {
  1423  		return ""
  1424  	}
  1425  	return namespace
  1426  }
  1427  
  1428  // AllocRegistrations returns the registrations for the given allocation. If the
  1429  // allocation has no registrations, the response is a nil object.
  1430  func (c *ServiceClient) AllocRegistrations(allocID string) (*serviceregistration.AllocRegistration, error) {
  1431  	// Get the internal struct using the lock
  1432  	c.allocRegistrationsLock.RLock()
  1433  	regInternal, ok := c.allocRegistrations[allocID]
  1434  	if !ok {
  1435  		c.allocRegistrationsLock.RUnlock()
  1436  		return nil, nil
  1437  	}
  1438  
  1439  	// Copy so we don't expose internal structs
  1440  	reg := regInternal.Copy()
  1441  	c.allocRegistrationsLock.RUnlock()
  1442  
  1443  	// Get the list of all namespaces created so we can iterate them.
  1444  	namespaces, err := c.namespacesClient.List()
  1445  	if err != nil {
  1446  		return nil, fmt.Errorf("failed to retrieve namespaces from consul: %w", err)
  1447  	}
  1448  
  1449  	services := make(map[string]*api.AgentService)
  1450  	checks := make(map[string]*api.AgentCheck)
  1451  
  1452  	// Query the services and checks to populate the allocation registrations.
  1453  	for _, namespace := range namespaces {
  1454  		nsServices, err := c.agentAPI.ServicesWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
  1455  		if err != nil {
  1456  			return nil, fmt.Errorf("failed to retrieve services from consul: %w", err)
  1457  		}
  1458  		for k, v := range nsServices {
  1459  			services[k] = v
  1460  		}
  1461  
  1462  		nsChecks, err := c.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
  1463  		if err != nil {
  1464  			return nil, fmt.Errorf("failed to retrieve checks from consul: %w", err)
  1465  		}
  1466  		for k, v := range nsChecks {
  1467  			checks[k] = v
  1468  		}
  1469  	}
  1470  
  1471  	// Populate the object
  1472  	for _, treg := range reg.Tasks {
  1473  		for serviceID, sreg := range treg.Services {
  1474  			sreg.Service = services[serviceID]
  1475  			for checkID := range sreg.CheckIDs {
  1476  				if check, ok := checks[checkID]; ok {
  1477  					sreg.Checks = append(sreg.Checks, check)
  1478  				}
  1479  			}
  1480  		}
  1481  	}
  1482  
  1483  	return reg, nil
  1484  }
  1485  
  1486  // UpdateTTL is used to update the TTL of a check. Typically this will only be
  1487  // called to heartbeat script checks.
  1488  func (c *ServiceClient) UpdateTTL(id, namespace, output, status string) error {
  1489  	ns := normalizeNamespace(namespace)
  1490  	return c.agentAPI.UpdateTTLOpts(id, output, status, &api.QueryOptions{Namespace: ns})
  1491  }
  1492  
  1493  // Shutdown the Consul client. Update running task registrations and deregister
  1494  // agent from Consul. On first call blocks up to shutdownWait before giving up
  1495  // on syncing operations.
  1496  func (c *ServiceClient) Shutdown() error {
  1497  	// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
  1498  	// entries.
  1499  	c.agentLock.Lock()
  1500  	defer c.agentLock.Unlock()
  1501  	select {
  1502  	case <-c.shutdownCh:
  1503  		return nil
  1504  	default:
  1505  		close(c.shutdownCh)
  1506  	}
  1507  
  1508  	// Give run loop time to sync, but don't block indefinitely
  1509  	deadline := time.After(c.shutdownWait)
  1510  
  1511  	// Wait for Run to finish any outstanding operations and exit
  1512  	select {
  1513  	case <-c.exitCh:
  1514  	case <-deadline:
  1515  		// Don't wait forever though
  1516  	}
  1517  
  1518  	// If Consul was never seen nothing could be written so exit early
  1519  	if !c.hasSeen() {
  1520  		return nil
  1521  	}
  1522  
  1523  	// Always attempt to deregister Nomad agent Consul entries, even if
  1524  	// deadline was reached
  1525  	for _, id := range c.agentServices.List() {
  1526  		if err := c.agentAPI.ServiceDeregisterOpts(id, nil); err != nil {
  1527  			c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
  1528  		}
  1529  	}
  1530  
  1531  	namespaces, err := c.namespacesClient.List()
  1532  	if err != nil {
  1533  		c.logger.Error("failed to retrieve namespaces from consul", "error", err)
  1534  	}
  1535  
  1536  	remainingChecks := make(map[string]*api.AgentCheck)
  1537  	for _, namespace := range namespaces {
  1538  		nsChecks, err := c.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
  1539  		if err != nil {
  1540  			c.logger.Error("failed to retrieve checks from consul", "error", err)
  1541  		}
  1542  		for k, v := range nsChecks {
  1543  			remainingChecks[k] = v
  1544  		}
  1545  	}
  1546  
  1547  	checkRemains := func(id string) bool {
  1548  		for _, c := range remainingChecks {
  1549  			if c.CheckID == id {
  1550  				return true
  1551  			}
  1552  		}
  1553  		return false
  1554  	}
  1555  
  1556  	for _, id := range c.agentChecks.List() {
  1557  		// if we couldn't populate remainingChecks it is unlikely that CheckDeregister will work, but try anyway
  1558  		// if we could list the remaining checks, verify that the check we store still exists before removing it.
  1559  		if remainingChecks == nil || checkRemains(id) {
  1560  			ns := remainingChecks[id].Namespace
  1561  			if err := c.agentAPI.CheckDeregisterOpts(id, &api.QueryOptions{Namespace: ns}); err != nil {
  1562  				c.logger.Error("failed deregistering agent check", "check_id", id, "error", err)
  1563  			}
  1564  		}
  1565  	}
  1566  
  1567  	return nil
  1568  }
  1569  
  1570  // addRegistration adds the service registrations for the given allocation.
  1571  func (c *ServiceClient) addRegistrations(allocID, taskName string, reg *serviceregistration.ServiceRegistrations) {
  1572  	c.allocRegistrationsLock.Lock()
  1573  	defer c.allocRegistrationsLock.Unlock()
  1574  
  1575  	alloc, ok := c.allocRegistrations[allocID]
  1576  	if !ok {
  1577  		alloc = &serviceregistration.AllocRegistration{
  1578  			Tasks: make(map[string]*serviceregistration.ServiceRegistrations),
  1579  		}
  1580  		c.allocRegistrations[allocID] = alloc
  1581  	}
  1582  	alloc.Tasks[taskName] = reg
  1583  }
  1584  
  1585  // removeRegistrations removes the registration for the given allocation.
  1586  func (c *ServiceClient) removeRegistration(allocID, taskName string) {
  1587  	c.allocRegistrationsLock.Lock()
  1588  	defer c.allocRegistrationsLock.Unlock()
  1589  
  1590  	alloc, ok := c.allocRegistrations[allocID]
  1591  	if !ok {
  1592  		return
  1593  	}
  1594  
  1595  	// Delete the task and if it is the last one also delete the alloc's
  1596  	// registration
  1597  	delete(alloc.Tasks, taskName)
  1598  	if len(alloc.Tasks) == 0 {
  1599  		delete(c.allocRegistrations, allocID)
  1600  	}
  1601  }
  1602  
  1603  // makeAgentServiceID creates a unique ID for identifying an agent service in
  1604  // Consul.
  1605  //
  1606  // Agent service IDs are of the form:
  1607  //
  1608  //	{nomadServicePrefix}-{ROLE}-b32(sha1({Service.Name}-{Service.Tags...})
  1609  //	Example Server ID: _nomad-server-fbbk265qn4tmt25nd4ep42tjvmyj3hr4
  1610  //	Example Client ID: _nomad-client-ggnjpgl7yn7rgmvxzilmpvrzzvrszc7l
  1611  func makeAgentServiceID(role string, service *structs.Service) string {
  1612  	return fmt.Sprintf("%s-%s-%s", nomadServicePrefix, role, service.Hash(role, "", false))
  1613  }
  1614  
  1615  // MakeCheckID creates a unique ID for a check.
  1616  //
  1617  //	Example Check ID: _nomad-check-434ae42f9a57c5705344974ac38de2aee0ee089d
  1618  func MakeCheckID(serviceID string, check *structs.ServiceCheck) string {
  1619  	return fmt.Sprintf("%s%s", nomadCheckPrefix, check.Hash(serviceID))
  1620  }
  1621  
  1622  // createCheckReg creates a Check that can be registered with Consul.
  1623  //
  1624  // Script checks simply have a TTL set and the caller is responsible for
  1625  // running the script and heart-beating.
  1626  func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int, namespace string) (*api.AgentCheckRegistration, error) {
  1627  	chkReg := api.AgentCheckRegistration{
  1628  		ID:        checkID,
  1629  		Name:      check.Name,
  1630  		ServiceID: serviceID,
  1631  		Namespace: normalizeNamespace(namespace),
  1632  	}
  1633  	chkReg.Status = check.InitialStatus
  1634  	chkReg.Timeout = check.Timeout.String()
  1635  	chkReg.Interval = check.Interval.String()
  1636  	chkReg.SuccessBeforePassing = check.SuccessBeforePassing
  1637  	chkReg.FailuresBeforeCritical = check.FailuresBeforeCritical
  1638  
  1639  	// Require an address for http or tcp checks
  1640  	if port == 0 && check.RequiresPort() {
  1641  		return nil, fmt.Errorf("%s checks require an address", check.Type)
  1642  	}
  1643  
  1644  	switch check.Type {
  1645  	case structs.ServiceCheckHTTP:
  1646  		proto := check.Protocol
  1647  		if proto == "" {
  1648  			proto = "http"
  1649  		}
  1650  		if check.TLSSkipVerify {
  1651  			chkReg.TLSSkipVerify = true
  1652  		}
  1653  		base := url.URL{
  1654  			Scheme: proto,
  1655  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
  1656  		}
  1657  		relative, err := url.Parse(check.Path)
  1658  		if err != nil {
  1659  			return nil, err
  1660  		}
  1661  		checkURL := base.ResolveReference(relative)
  1662  		chkReg.HTTP = checkURL.String()
  1663  		chkReg.Method = check.Method
  1664  		chkReg.Header = check.Header
  1665  		chkReg.Body = check.Body
  1666  
  1667  	case structs.ServiceCheckTCP:
  1668  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
  1669  
  1670  	case structs.ServiceCheckScript:
  1671  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
  1672  		// As of Consul 1.0.0 setting TTL and Interval is a 400
  1673  		chkReg.Interval = ""
  1674  
  1675  	case structs.ServiceCheckGRPC:
  1676  		chkReg.GRPC = fmt.Sprintf("%s/%s", net.JoinHostPort(host, strconv.Itoa(port)), check.GRPCService)
  1677  		chkReg.GRPCUseTLS = check.GRPCUseTLS
  1678  		if check.TLSSkipVerify {
  1679  			chkReg.TLSSkipVerify = true
  1680  		}
  1681  
  1682  	default:
  1683  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
  1684  	}
  1685  	return &chkReg, nil
  1686  }
  1687  
  1688  // isNomadClient returns true if id represents a Nomad Client registration.
  1689  func isNomadClient(id string) bool {
  1690  	return strings.HasPrefix(id, nomadClientPrefix)
  1691  }
  1692  
  1693  // isNomadServer returns true if id represents a Nomad Server registration.
  1694  func isNomadServer(id string) bool {
  1695  	return strings.HasPrefix(id, nomadServerPrefix)
  1696  }
  1697  
  1698  // isNomadAgent returns true if id represents a Nomad Client or Server registration.
  1699  func isNomadAgent(id string) bool {
  1700  	return isNomadClient(id) || isNomadServer(id)
  1701  }
  1702  
  1703  // isNomadService returns true if the ID matches the pattern of a Nomad managed
  1704  // service (new or old formats). Agent services return false as independent
  1705  // client and server agents may be running on the same machine. #2827
  1706  func isNomadService(id string) bool {
  1707  	return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id)
  1708  }
  1709  
  1710  // isNomadCheck returns true if the ID matches the pattern of a Nomad managed
  1711  // check.
  1712  func isNomadCheck(id string) bool {
  1713  	return strings.HasPrefix(id, nomadCheckPrefix)
  1714  }
  1715  
  1716  // isOldNomadService returns true if the ID matches an old pattern managed by
  1717  // Nomad.
  1718  //
  1719  // Pre-0.7.1 task service IDs are of the form:
  1720  //
  1721  //	{nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
  1722  //	Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
  1723  func isOldNomadService(id string) bool {
  1724  	const prefix = nomadServicePrefix + "-executor"
  1725  	return strings.HasPrefix(id, prefix)
  1726  }
  1727  
  1728  const (
  1729  	sidecarSuffix = "-sidecar-proxy"
  1730  )
  1731  
  1732  // maybeConnectSidecar returns true if the ID is likely of a Connect sidecar proxy.
  1733  // This function should only be used to determine if Nomad should skip managing
  1734  // service id; it could produce false negatives for non-Nomad managed services
  1735  // (i.e. someone set the ID manually), but Nomad does not manage those anyway.
  1736  //
  1737  // It is important not to reference the parent service, which may or may not still
  1738  // be tracked by Nomad internally.
  1739  //
  1740  // For example if you have a Connect enabled service with the ID:
  1741  //
  1742  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db
  1743  //
  1744  // Consul will create a service for the sidecar proxy with the ID:
  1745  //
  1746  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy
  1747  func maybeConnectSidecar(id string) bool {
  1748  	return strings.HasSuffix(id, sidecarSuffix)
  1749  }
  1750  
  1751  var (
  1752  	sidecarProxyCheckRe = regexp.MustCompile(`^service:_nomad-.+-sidecar-proxy(:[\d]+)?$`)
  1753  )
  1754  
  1755  // maybeSidecarProxyCheck returns true if the ID likely matches a Nomad generated
  1756  // check ID used in the context of a Nomad managed Connect sidecar proxy. This function
  1757  // should only be used to determine if Nomad should skip managing a check; it can
  1758  // produce false negatives for non-Nomad managed Connect sidecar proxy checks (i.e.
  1759  // someone set the ID manually), but Nomad does not manage those anyway.
  1760  //
  1761  // For example if you have a Connect enabled service with the ID:
  1762  //
  1763  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db
  1764  //
  1765  // Nomad will create a Connect sidecar proxy of ID:
  1766  //
  1767  // _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy
  1768  //
  1769  // With default checks like:
  1770  //
  1771  // service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:1
  1772  // service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:2
  1773  //
  1774  // Unless sidecar_service.disable_default_tcp_check is set, in which case the
  1775  // default check is:
  1776  //
  1777  // service:_nomad-task-322616db-2680-35d8-0d10-b50a0a0aa4cd-group-api-count-api-9001-sidecar-proxy
  1778  func maybeSidecarProxyCheck(id string) bool {
  1779  	return sidecarProxyCheckRe.MatchString(id)
  1780  }
  1781  
  1782  // getNomadSidecar returns the service registration of the sidecar for the managed
  1783  // service with the specified id.
  1784  //
  1785  // If the managed service of the specified id does not exist, or the service does
  1786  // not have a sidecar proxy, nil is returned.
  1787  func getNomadSidecar(id string, services map[string]*api.AgentService) *api.AgentService {
  1788  	if _, exists := services[id]; !exists {
  1789  		return nil
  1790  	}
  1791  
  1792  	sidecarID := id + sidecarSuffix
  1793  	return services[sidecarID]
  1794  }
  1795  
  1796  func parseAddress(raw string, port int) (api.ServiceAddress, error) {
  1797  	result := api.ServiceAddress{}
  1798  	addr, portStr, err := net.SplitHostPort(raw)
  1799  	// Error message from Go's net/ipsock.go
  1800  	if err != nil {
  1801  		if !strings.Contains(err.Error(), "missing port in address") {
  1802  			return result, fmt.Errorf("error parsing address %q: %v", raw, err)
  1803  		}
  1804  
  1805  		// Use the whole input as the address if there wasn't a port.
  1806  		if ip := net.ParseIP(raw); ip == nil {
  1807  			return result, fmt.Errorf("error parsing address %q: not an IP address", raw)
  1808  		}
  1809  		addr = raw
  1810  	}
  1811  
  1812  	if portStr != "" {
  1813  		port, err = strconv.Atoi(portStr)
  1814  		if err != nil {
  1815  			return result, fmt.Errorf("error parsing port %q: %v", portStr, err)
  1816  		}
  1817  	}
  1818  
  1819  	result.Address = addr
  1820  	result.Port = port
  1821  	return result, nil
  1822  }
  1823  
  1824  // morph the tagged_addresses map into the structure consul api wants
  1825  func parseTaggedAddresses(m map[string]string, port int) (map[string]api.ServiceAddress, error) {
  1826  	result := make(map[string]api.ServiceAddress, len(m))
  1827  	for k, v := range m {
  1828  		sa, err := parseAddress(v, port)
  1829  		if err != nil {
  1830  			return nil, err
  1831  		}
  1832  		result[k] = sa
  1833  	}
  1834  	return result, nil
  1835  }