github.com/hernad/nomad@v1.6.112/command/agent/consul/service_client.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package consul
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net"
    10  	"net/url"
    11  	"reflect"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"sync/atomic"
    17  	"time"
    18  
    19  	"github.com/armon/go-metrics"
    20  	"github.com/hashicorp/consul/api"
    21  	"github.com/hashicorp/go-hclog"
    22  	"github.com/hashicorp/go-set"
    23  	"github.com/hernad/nomad/client/serviceregistration"
    24  	"github.com/hernad/nomad/helper"
    25  	"github.com/hernad/nomad/helper/envoy"
    26  	"github.com/hernad/nomad/nomad/structs"
    27  	"golang.org/x/exp/maps"
    28  	"golang.org/x/exp/slices"
    29  )
    30  
    31  const (
    32  	// nomadServicePrefix is the prefix that scopes all Nomad registered
    33  	// services (both agent and task entries).
    34  	nomadServicePrefix = "_nomad"
    35  
    36  	// nomadServerPrefix is the prefix that scopes Nomad registered Servers.
    37  	nomadServerPrefix = nomadServicePrefix + "-server-"
    38  
    39  	// nomadClientPrefix is the prefix that scopes Nomad registered Clients.
    40  	nomadClientPrefix = nomadServicePrefix + "-client-"
    41  
    42  	// nomadTaskPrefix is the prefix that scopes Nomad registered services
    43  	// for tasks.
    44  	nomadTaskPrefix = nomadServicePrefix + "-task-"
    45  
    46  	// nomadCheckPrefix is the prefix that scopes Nomad registered checks for
    47  	// services.
    48  	nomadCheckPrefix = nomadServicePrefix + "-check-"
    49  
    50  	// defaultRetryInterval is how quickly to retry syncing services and
    51  	// checks to Consul when an error occurs. Will backoff up to a max.
    52  	defaultRetryInterval = time.Second
    53  
    54  	// defaultMaxRetryInterval is the default max retry interval.
    55  	defaultMaxRetryInterval = 30 * time.Second
    56  
    57  	// defaultPeriodicalInterval is the interval at which the service
    58  	// client reconciles state between the desired services and checks and
    59  	// what's actually registered in Consul. This is done at an interval,
    60  	// rather than being purely edge triggered, to handle the case that the
    61  	// Consul agent's state may change underneath us
    62  	defaultPeriodicInterval = 30 * time.Second
    63  
    64  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    65  	// the check result
    66  	ttlCheckBuffer = 31 * time.Second
    67  
    68  	// defaultShutdownWait is how long Shutdown() should block waiting for
    69  	// enqueued operations to sync to Consul by default.
    70  	defaultShutdownWait = time.Minute
    71  
    72  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    73  	// spend waiting for a response from a Consul Query.
    74  	DefaultQueryWaitDuration = 2 * time.Second
    75  
    76  	// ServiceTagHTTP is the tag assigned to HTTP services
    77  	ServiceTagHTTP = "http"
    78  
    79  	// ServiceTagRPC is the tag assigned to RPC services
    80  	ServiceTagRPC = "rpc"
    81  
    82  	// ServiceTagSerf is the tag assigned to Serf services
    83  	ServiceTagSerf = "serf"
    84  
    85  	// deregisterProbationPeriod is the initialization period where
    86  	// services registered in Consul but not in Nomad don't get deregistered,
    87  	// to allow for nomad restoring tasks
    88  	deregisterProbationPeriod = time.Minute
    89  )
    90  
    91  // Additional Consul ACLs required
    92  // - Consul Template: key:read
    93  //   Used in tasks with template block that use Consul keys.
    94  
    95  // CatalogAPI is the consul/api.Catalog API used by Nomad.
    96  //
    97  // ACL requirements
    98  // - node:read (listing datacenters)
    99  // - service:read
   100  type CatalogAPI interface {
   101  	Datacenters() ([]string, error)
   102  	Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error)
   103  }
   104  
   105  // NamespaceAPI is the consul/api.Namespace API used by Nomad.
   106  //
   107  // ACL requirements
   108  // - operator:read OR namespace:*:read
   109  type NamespaceAPI interface {
   110  	List(q *api.QueryOptions) ([]*api.Namespace, *api.QueryMeta, error)
   111  }
   112  
   113  // AgentAPI is the consul/api.Agent API used by Nomad.
   114  //
   115  // ACL requirements
   116  // - agent:read
   117  // - service:write
   118  type AgentAPI interface {
   119  	CheckRegister(check *api.AgentCheckRegistration) error
   120  	CheckDeregisterOpts(checkID string, q *api.QueryOptions) error
   121  	ChecksWithFilterOpts(filter string, q *api.QueryOptions) (map[string]*api.AgentCheck, error)
   122  	UpdateTTLOpts(id, output, status string, q *api.QueryOptions) error
   123  
   124  	ServiceRegister(service *api.AgentServiceRegistration) error
   125  	ServiceDeregisterOpts(serviceID string, q *api.QueryOptions) error
   126  	ServicesWithFilterOpts(filter string, q *api.QueryOptions) (map[string]*api.AgentService, error)
   127  
   128  	Self() (map[string]map[string]interface{}, error)
   129  }
   130  
   131  // ConfigAPI is the consul/api.ConfigEntries API subset used by Nomad Server.
   132  //
   133  // ACL requirements
   134  // - operator:write (server only)
   135  type ConfigAPI interface {
   136  	Set(entry api.ConfigEntry, w *api.WriteOptions) (bool, *api.WriteMeta, error)
   137  	// Delete(kind, name string, w *api.WriteOptions) (*api.WriteMeta, error) (not used)
   138  }
   139  
   140  // ACLsAPI is the consul/api.ACL API subset used by Nomad Server.
   141  //
   142  // ACL requirements
   143  // - acl:write (server only)
   144  type ACLsAPI interface {
   145  	TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error) // for lookup via operator token
   146  	PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error)
   147  	RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error)
   148  	TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error)
   149  	TokenDelete(accessorID string, q *api.WriteOptions) (*api.WriteMeta, error)
   150  	TokenList(q *api.QueryOptions) ([]*api.ACLTokenListEntry, *api.QueryMeta, error)
   151  }
   152  
   153  // agentServiceUpdateRequired checks if any critical fields in Nomad's version
   154  // of a service definition are different from the existing service definition as
   155  // known by Consul.
   156  //
   157  //	reason - The syncReason that triggered this synchronization with the consul
   158  //	         agent API.
   159  //	wanted - Nomad's view of what the service definition is intended to be.
   160  //	         Not nil.
   161  //	existing - Consul's view (agent, not catalog) of the actual service definition.
   162  //	         Not nil.
   163  //	sidecar - Consul's view (agent, not catalog) of the service definition of the sidecar
   164  //	         associated with existing that may or may not exist.
   165  //	         May be nil.
   166  func (s *ServiceClient) agentServiceUpdateRequired(reason syncReason, wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool {
   167  	switch reason {
   168  	case syncPeriodic:
   169  		// In a periodic sync with Consul, we need to respect the value of
   170  		// the enable_tag_override field so that we maintain the illusion that the
   171  		// user is in control of the Consul tags, as they may be externally edited
   172  		// via the Consul catalog API (e.g. a user manually sets them).
   173  		//
   174  		// As Consul does by disabling anti-entropy for the tags field, Nomad will
   175  		// ignore differences in the tags field during the periodic syncs with
   176  		// the Consul agent API.
   177  		//
   178  		// We do so by over-writing the nomad service registration by the value
   179  		// of the tags that Consul contains, if enable_tag_override = true.
   180  		maybeTweakTags(wanted, existing, sidecar)
   181  
   182  		// Also, purge tagged address fields of nomad agent services.
   183  		maybeTweakTaggedAddresses(wanted, existing)
   184  
   185  		// Okay now it is safe to compare.
   186  		return s.different(wanted, existing, sidecar)
   187  
   188  	default:
   189  		// A non-periodic sync with Consul indicates an operation has been set
   190  		// on the queue. This happens when service has been added / removed / modified
   191  		// and implies the Consul agent should be sync'd with nomad, because
   192  		// nomad is the ultimate source of truth for the service definition.
   193  
   194  		// But do purge tagged address fields of nomad agent services.
   195  		maybeTweakTaggedAddresses(wanted, existing)
   196  
   197  		// Okay now it is safe to compare.
   198  		return s.different(wanted, existing, sidecar)
   199  	}
   200  }
   201  
   202  // maybeTweakTags will override wanted.Tags with a copy of existing.Tags only if
   203  // EnableTagOverride is true. Otherwise the wanted service registration is left
   204  // unchanged.
   205  func maybeTweakTags(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) {
   206  	if wanted.EnableTagOverride {
   207  		wanted.Tags = slices.Clone(existing.Tags)
   208  		// If the service registration also defines a sidecar service, use the ETO
   209  		// setting for the parent service to also apply to the sidecar.
   210  		if wanted.Connect != nil && wanted.Connect.SidecarService != nil {
   211  			if sidecar != nil {
   212  				wanted.Connect.SidecarService.Tags = slices.Clone(sidecar.Tags)
   213  			}
   214  		}
   215  	}
   216  }
   217  
   218  // maybeTweakTaggedAddresses will remove the Consul-injected .TaggedAddresses fields
   219  // from existing if wanted represents a Nomad agent (Client or Server) or Nomad managed
   220  // service, which do not themselves configure those tagged addresses. We do this
   221  // because Consul will magically set the .TaggedAddress to values Nomad does not
   222  // know about if they are submitted as unset.
   223  func maybeTweakTaggedAddresses(wanted *api.AgentServiceRegistration, existing *api.AgentService) {
   224  	if isNomadAgent(wanted.ID) || isNomadService(wanted.ID) {
   225  		if _, exists := wanted.TaggedAddresses["lan_ipv4"]; !exists {
   226  			delete(existing.TaggedAddresses, "lan_ipv4")
   227  		}
   228  		if _, exists := wanted.TaggedAddresses["wan_ipv4"]; !exists {
   229  			delete(existing.TaggedAddresses, "wan_ipv4")
   230  		}
   231  		if _, exists := wanted.TaggedAddresses["lan_ipv6"]; !exists {
   232  			delete(existing.TaggedAddresses, "lan_ipv6")
   233  		}
   234  		if _, exists := wanted.TaggedAddresses["wan_ipv6"]; !exists {
   235  			delete(existing.TaggedAddresses, "wan_ipv6")
   236  		}
   237  	}
   238  }
   239  
   240  // different compares the wanted state of the service registration with the actual
   241  // (cached) state of the service registration reported by Consul. If any of the
   242  // critical fields are not deeply equal, they considered different.
   243  func (s *ServiceClient) different(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool {
   244  	trace := func(field string, left, right any) {
   245  		s.logger.Trace("registrations different", "id", wanted.ID,
   246  			"field", field, "wanted", fmt.Sprintf("%#v", left), "existing", fmt.Sprintf("%#v", right),
   247  		)
   248  	}
   249  
   250  	switch {
   251  	case wanted.Kind != existing.Kind:
   252  		trace("kind", wanted.Kind, existing.Kind)
   253  		return true
   254  	case wanted.ID != existing.ID:
   255  		trace("id", wanted.ID, existing.ID)
   256  		return true
   257  	case wanted.Port != existing.Port:
   258  		trace("port", wanted.Port, existing.Port)
   259  		return true
   260  	case wanted.Address != existing.Address:
   261  		trace("address", wanted.Address, existing.Address)
   262  		return true
   263  	case wanted.Name != existing.Service:
   264  		trace("service name", wanted.Name, existing.Service)
   265  		return true
   266  	case wanted.EnableTagOverride != existing.EnableTagOverride:
   267  		trace("enable_tag_override", wanted.EnableTagOverride, existing.EnableTagOverride)
   268  		return true
   269  	case !maps.Equal(wanted.Meta, existing.Meta):
   270  		trace("meta", wanted.Meta, existing.Meta)
   271  		return true
   272  	case !maps.Equal(wanted.TaggedAddresses, existing.TaggedAddresses):
   273  		trace("tagged_addresses", wanted.TaggedAddresses, existing.TaggedAddresses)
   274  		return true
   275  	case !helper.SliceSetEq(wanted.Tags, existing.Tags):
   276  		trace("tags", wanted.Tags, existing.Tags)
   277  		return true
   278  	case connectSidecarDifferent(wanted, sidecar):
   279  		trace("connect_sidecar", wanted.Name, existing.Service)
   280  		return true
   281  	}
   282  	return false
   283  }
   284  
   285  // sidecarTagsDifferent includes the special logic for comparing sidecar tags
   286  // from Nomad vs. Consul perspective. Because Consul forces the sidecar tags
   287  // to inherit the parent service tags if the sidecar tags are unset, we need to
   288  // take that into consideration when Nomad's sidecar tags are unset by instead
   289  // comparing them to the parent service tags.
   290  func sidecarTagsDifferent(parent, wanted, sidecar []string) bool {
   291  	if len(wanted) == 0 {
   292  		return !helper.SliceSetEq(parent, sidecar)
   293  	}
   294  	return !helper.SliceSetEq(wanted, sidecar)
   295  }
   296  
   297  // proxyUpstreamsDifferent determines if the sidecar_service.proxy.upstreams
   298  // configurations are different between the desired sidecar service state, and
   299  // the actual sidecar service state currently registered in Consul.
   300  func proxyUpstreamsDifferent(wanted *api.AgentServiceConnect, sidecar *api.AgentServiceConnectProxyConfig) bool {
   301  	// There is similar code that already does this in Nomad's API package,
   302  	// however here we are operating on Consul API package structs, and they do not
   303  	// provide such helper functions.
   304  
   305  	getProxyUpstreams := func(pc *api.AgentServiceConnectProxyConfig) []api.Upstream {
   306  		switch {
   307  		case pc == nil:
   308  			return nil
   309  		case len(pc.Upstreams) == 0:
   310  			return nil
   311  		default:
   312  			return pc.Upstreams
   313  		}
   314  	}
   315  
   316  	getConnectUpstreams := func(sc *api.AgentServiceConnect) []api.Upstream {
   317  		switch {
   318  		case sc.SidecarService.Proxy == nil:
   319  			return nil
   320  		case len(sc.SidecarService.Proxy.Upstreams) == 0:
   321  			return nil
   322  		default:
   323  			return sc.SidecarService.Proxy.Upstreams
   324  		}
   325  	}
   326  
   327  	upstreamsDifferent := func(a, b []api.Upstream) bool {
   328  		if len(a) != len(b) {
   329  			return true
   330  		}
   331  
   332  		for i := 0; i < len(a); i++ {
   333  			A := a[i]
   334  			B := b[i]
   335  			switch {
   336  			case A.Datacenter != B.Datacenter:
   337  				return true
   338  			case A.DestinationName != B.DestinationName:
   339  				return true
   340  			case A.LocalBindAddress != B.LocalBindAddress:
   341  				return true
   342  			case A.LocalBindPort != B.LocalBindPort:
   343  				return true
   344  			case A.MeshGateway.Mode != B.MeshGateway.Mode:
   345  				return true
   346  			case !reflect.DeepEqual(A.Config, B.Config):
   347  				return true
   348  			}
   349  		}
   350  		return false
   351  	}
   352  
   353  	return upstreamsDifferent(
   354  		getConnectUpstreams(wanted),
   355  		getProxyUpstreams(sidecar),
   356  	)
   357  }
   358  
   359  // connectSidecarDifferent returns true if Nomad expects there to be a sidecar
   360  // hanging off the desired parent service definition on the Consul side, and does
   361  // not match with what Consul has.
   362  //
   363  // This is used to determine if the connect sidecar service registration should be
   364  // updated - potentially (but not necessarily) in-place.
   365  func connectSidecarDifferent(wanted *api.AgentServiceRegistration, sidecar *api.AgentService) bool {
   366  	if wanted.Connect != nil && wanted.Connect.SidecarService != nil {
   367  		if sidecar == nil {
   368  			// consul lost our sidecar (?)
   369  			return true
   370  		}
   371  
   372  		if sidecarTagsDifferent(wanted.Tags, wanted.Connect.SidecarService.Tags, sidecar.Tags) {
   373  			// tags on the nomad definition have been modified
   374  			return true
   375  		}
   376  
   377  		if proxyUpstreamsDifferent(wanted.Connect, sidecar.Proxy) {
   378  			// proxy upstreams on the nomad definition have been modified
   379  			return true
   380  		}
   381  	}
   382  
   383  	// Either Nomad does not expect there to be a sidecar_service, or there is
   384  	// no actionable difference from the Consul sidecar_service definition.
   385  	return false
   386  }
   387  
   388  // operations are submitted to the main loop via commit() for synchronizing
   389  // with Consul.
   390  type operations struct {
   391  	regServices   []*api.AgentServiceRegistration
   392  	regChecks     []*api.AgentCheckRegistration
   393  	deregServices []string
   394  	deregChecks   []string
   395  }
   396  
   397  func (o *operations) empty() bool {
   398  	switch {
   399  	case o == nil:
   400  		return true
   401  	case len(o.regServices) > 0:
   402  		return false
   403  	case len(o.regChecks) > 0:
   404  		return false
   405  	case len(o.deregServices) > 0:
   406  		return false
   407  	case len(o.deregChecks) > 0:
   408  		return false
   409  	default:
   410  		return true
   411  	}
   412  }
   413  
   414  func (o *operations) String() string {
   415  	return fmt.Sprintf("<%d, %d, %d, %d>", len(o.regServices), len(o.regChecks), len(o.deregServices), len(o.deregChecks))
   416  }
   417  
   418  // ServiceClient handles task and agent service registration with Consul.
   419  type ServiceClient struct {
   420  	agentAPI         AgentAPI
   421  	namespacesClient *NamespacesClient
   422  
   423  	logger           hclog.Logger
   424  	retryInterval    time.Duration
   425  	maxRetryInterval time.Duration
   426  	periodicInterval time.Duration
   427  
   428  	// exitCh is closed when the main Run loop exits
   429  	exitCh chan struct{}
   430  
   431  	// shutdownCh is closed when the client should shutdown
   432  	shutdownCh chan struct{}
   433  
   434  	// shutdownWait is how long Shutdown() blocks waiting for the final
   435  	// sync() to finish. Defaults to defaultShutdownWait
   436  	shutdownWait time.Duration
   437  
   438  	opCh chan *operations
   439  
   440  	services map[string]*api.AgentServiceRegistration
   441  	checks   map[string]*api.AgentCheckRegistration
   442  
   443  	explicitlyDeregisteredServices *set.Set[string]
   444  	explicitlyDeregisteredChecks   *set.Set[string]
   445  
   446  	// allocRegistrations stores the services and checks that are registered
   447  	// with Consul by allocation ID.
   448  	allocRegistrations     map[string]*serviceregistration.AllocRegistration
   449  	allocRegistrationsLock sync.RWMutex
   450  
   451  	// Nomad agent services and checks that are recorded so they can be removed
   452  	// on shutdown. Defers to consul namespace specified in client consul config.
   453  	agentServices *set.Set[string]
   454  	agentChecks   *set.Set[string]
   455  	agentLock     sync.Mutex
   456  
   457  	// seen is 1 if Consul has ever been seen; otherwise 0. Accessed with
   458  	// atomics.
   459  	seen int32
   460  
   461  	// deregisterProbationExpiry is the time before which consul sync shouldn't deregister
   462  	// unknown services.
   463  	// Used to mitigate risk of deleting restored services upon client restart.
   464  	deregisterProbationExpiry time.Time
   465  
   466  	// checkWatcher restarts checks that are unhealthy.
   467  	checkWatcher *serviceregistration.UniversalCheckWatcher
   468  
   469  	// isClientAgent specifies whether this Consul client is being used
   470  	// by a Nomad client.
   471  	isClientAgent bool
   472  }
   473  
   474  // checkStatusGetter is the consul-specific implementation of serviceregistration.CheckStatusGetter
   475  type checkStatusGetter struct {
   476  	agentAPI         AgentAPI
   477  	namespacesClient *NamespacesClient
   478  }
   479  
   480  func (csg *checkStatusGetter) Get() (map[string]string, error) {
   481  	// Get the list of all namespaces so we can iterate them.
   482  	namespaces, err := csg.namespacesClient.List()
   483  	if err != nil {
   484  		return nil, err
   485  	}
   486  
   487  	results := make(map[string]string)
   488  	for _, namespace := range namespaces {
   489  		resultsInNamespace, err := csg.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
   490  		if err != nil {
   491  			return nil, err
   492  		}
   493  
   494  		for k, v := range resultsInNamespace {
   495  			results[k] = v.Status
   496  		}
   497  	}
   498  	return results, nil
   499  }
   500  
   501  // NewServiceClient creates a new Consul ServiceClient from an existing Consul API
   502  // Client, logger and takes whether the client is being used by a Nomad Client agent.
   503  // When being used by a Nomad client, this Consul client reconciles all services and
   504  // checks created by Nomad on behalf of running tasks.
   505  func NewServiceClient(agentAPI AgentAPI, namespacesClient *NamespacesClient, logger hclog.Logger, isNomadClient bool) *ServiceClient {
   506  	logger = logger.ResetNamed("consul.sync")
   507  	return &ServiceClient{
   508  		agentAPI:                       agentAPI,
   509  		namespacesClient:               namespacesClient,
   510  		logger:                         logger,
   511  		retryInterval:                  defaultRetryInterval,
   512  		maxRetryInterval:               defaultMaxRetryInterval,
   513  		periodicInterval:               defaultPeriodicInterval,
   514  		exitCh:                         make(chan struct{}),
   515  		shutdownCh:                     make(chan struct{}),
   516  		shutdownWait:                   defaultShutdownWait,
   517  		opCh:                           make(chan *operations, 8),
   518  		services:                       make(map[string]*api.AgentServiceRegistration),
   519  		checks:                         make(map[string]*api.AgentCheckRegistration),
   520  		explicitlyDeregisteredServices: set.New[string](0),
   521  		explicitlyDeregisteredChecks:   set.New[string](0),
   522  		allocRegistrations:             make(map[string]*serviceregistration.AllocRegistration),
   523  		agentServices:                  set.New[string](4),
   524  		agentChecks:                    set.New[string](0),
   525  		isClientAgent:                  isNomadClient,
   526  		deregisterProbationExpiry:      time.Now().Add(deregisterProbationPeriod),
   527  		checkWatcher: serviceregistration.NewCheckWatcher(logger, &checkStatusGetter{
   528  			agentAPI:         agentAPI,
   529  			namespacesClient: namespacesClient,
   530  		}),
   531  	}
   532  }
   533  
   534  // seen is used by markSeen and hasSeen
   535  const seen = 1
   536  
   537  // markSeen marks Consul as having been seen (meaning at least one operation
   538  // has succeeded).
   539  func (c *ServiceClient) markSeen() {
   540  	atomic.StoreInt32(&c.seen, seen)
   541  }
   542  
   543  // hasSeen returns true if any Consul operation has ever succeeded. Useful to
   544  // squelch errors if Consul isn't running.
   545  func (c *ServiceClient) hasSeen() bool {
   546  	return atomic.LoadInt32(&c.seen) == seen
   547  }
   548  
   549  // syncReason indicates why a sync operation with consul is about to happen.
   550  //
   551  // The trigger for a sync may have implications on the behavior of the sync itself.
   552  // In particular if a service is defined with enable_tag_override=true, the sync
   553  // should ignore changes to the service's Tags field.
   554  type syncReason byte
   555  
   556  const (
   557  	syncPeriodic syncReason = iota
   558  	syncShutdown
   559  	syncNewOps
   560  )
   561  
   562  func (sr syncReason) String() string {
   563  	switch sr {
   564  	case syncPeriodic:
   565  		return "periodic"
   566  	case syncShutdown:
   567  		return "shutdown"
   568  	case syncNewOps:
   569  		return "operations"
   570  	default:
   571  		return "unexpected"
   572  	}
   573  }
   574  
   575  // Run the Consul main loop which retries operations against Consul. It should
   576  // be called exactly once.
   577  func (c *ServiceClient) Run() {
   578  	defer close(c.exitCh)
   579  
   580  	ctx, cancel := context.WithCancel(context.Background())
   581  	defer cancel()
   582  
   583  	// init will be closed when Consul has been contacted
   584  	init := make(chan struct{})
   585  	go checkConsulTLSSkipVerify(ctx, c.logger, c.agentAPI, init)
   586  
   587  	// Process operations while waiting for initial contact with Consul but
   588  	// do not sync until contact has been made.
   589  INIT:
   590  	for {
   591  		select {
   592  		case <-init:
   593  			c.markSeen()
   594  			break INIT
   595  		case <-c.shutdownCh:
   596  			return
   597  		case ops := <-c.opCh:
   598  			c.merge(ops)
   599  		}
   600  	}
   601  	c.logger.Trace("able to contact Consul")
   602  
   603  	// Block until contact with Consul has been established
   604  	// Start checkWatcher
   605  	go c.checkWatcher.Run(ctx)
   606  
   607  	// Always immediately sync to reconcile Nomad and Consul's state
   608  	retryTimer := time.NewTimer(0)
   609  
   610  	failures := 0
   611  	for {
   612  		// On every iteration take note of what the trigger for the next sync
   613  		// was, so that it may be referenced during the sync itself.
   614  		var reasonForSync syncReason
   615  
   616  		select {
   617  		case <-retryTimer.C:
   618  			reasonForSync = syncPeriodic
   619  		case <-c.shutdownCh:
   620  			reasonForSync = syncShutdown
   621  			// Cancel check watcher but sync one last time
   622  			cancel()
   623  		case ops := <-c.opCh:
   624  			reasonForSync = syncNewOps
   625  			c.merge(ops)
   626  		}
   627  
   628  		if err := c.sync(reasonForSync); err != nil {
   629  			if failures == 0 {
   630  				// Log on the first failure
   631  				c.logger.Warn("failed to update services in Consul", "error", err)
   632  			} else if failures%10 == 0 {
   633  				// Log every 10th consecutive failure
   634  				c.logger.Error("still unable to update services in Consul", "failures", failures, "error", err)
   635  			}
   636  
   637  			failures++
   638  			if !retryTimer.Stop() {
   639  				// Timer already expired, since the timer may
   640  				// or may not have been read in the select{}
   641  				// above, conditionally receive on it
   642  				select {
   643  				case <-retryTimer.C:
   644  				default:
   645  				}
   646  			}
   647  			backoff := c.retryInterval * time.Duration(failures)
   648  			if backoff > c.maxRetryInterval {
   649  				backoff = c.maxRetryInterval
   650  			}
   651  			retryTimer.Reset(backoff)
   652  		} else {
   653  			if failures > 0 {
   654  				c.logger.Info("successfully updated services in Consul")
   655  				failures = 0
   656  			}
   657  
   658  			// on successful sync, clear deregistered consul entities
   659  			c.clearExplicitlyDeregistered()
   660  
   661  			// Reset timer to periodic interval to periodically
   662  			// reconile with Consul
   663  			if !retryTimer.Stop() {
   664  				select {
   665  				case <-retryTimer.C:
   666  				default:
   667  				}
   668  			}
   669  			retryTimer.Reset(c.periodicInterval)
   670  		}
   671  
   672  		select {
   673  		case <-c.shutdownCh:
   674  			// Exit only after sync'ing all outstanding operations
   675  			if len(c.opCh) > 0 {
   676  				for len(c.opCh) > 0 {
   677  					c.merge(<-c.opCh)
   678  				}
   679  				continue
   680  			}
   681  			return
   682  		default:
   683  		}
   684  
   685  	}
   686  }
   687  
   688  // commit operations unless already shutting down.
   689  func (c *ServiceClient) commit(ops *operations) {
   690  	c.logger.Trace("commit sync operations", "ops", ops)
   691  
   692  	// Ignore empty operations - ideally callers will optimize out syncs with
   693  	// nothing to do, but be defensive anyway. Sending an empty ops on the chan
   694  	// will trigger an unnecessary sync with Consul.
   695  	if ops.empty() {
   696  		return
   697  	}
   698  
   699  	// Prioritize doing nothing if we are being signaled to shutdown.
   700  	select {
   701  	case <-c.shutdownCh:
   702  		return
   703  	default:
   704  	}
   705  
   706  	// Send the ops down the ops chan, triggering a sync with Consul. Unless we
   707  	// receive a signal to shutdown.
   708  	select {
   709  	case c.opCh <- ops:
   710  	case <-c.shutdownCh:
   711  	}
   712  }
   713  
   714  func (c *ServiceClient) clearExplicitlyDeregistered() {
   715  	c.explicitlyDeregisteredServices = set.New[string](0)
   716  	c.explicitlyDeregisteredChecks = set.New[string](0)
   717  }
   718  
   719  // merge registrations into state map prior to sync'ing with Consul
   720  func (c *ServiceClient) merge(ops *operations) {
   721  	for _, s := range ops.regServices {
   722  		c.services[s.ID] = s
   723  	}
   724  	for _, check := range ops.regChecks {
   725  		c.checks[check.ID] = check
   726  	}
   727  	for _, sid := range ops.deregServices {
   728  		delete(c.services, sid)
   729  		c.explicitlyDeregisteredServices.Insert(sid)
   730  	}
   731  	for _, cid := range ops.deregChecks {
   732  		delete(c.checks, cid)
   733  		c.explicitlyDeregisteredChecks.Insert(cid)
   734  	}
   735  	metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
   736  	metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
   737  }
   738  
   739  // sync enqueued operations.
   740  func (c *ServiceClient) sync(reason syncReason) error {
   741  	c.logger.Trace("execute sync", "reason", reason)
   742  
   743  	sreg, creg, sdereg, cdereg := 0, 0, 0, 0
   744  	var err error
   745  
   746  	// Get the list of all namespaces created so we can iterate them.
   747  	namespaces, err := c.namespacesClient.List()
   748  	if err != nil {
   749  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   750  		return fmt.Errorf("failed to query Consul namespaces: %w", err)
   751  	}
   752  
   753  	// Accumulate all services in Consul across all namespaces.
   754  	servicesInConsul := make(map[string]*api.AgentService)
   755  	for _, namespace := range namespaces {
   756  		if nsServices, err := c.agentAPI.ServicesWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)}); err != nil {
   757  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   758  			return fmt.Errorf("failed to query Consul services: %w", err)
   759  		} else {
   760  			for k, v := range nsServices {
   761  				servicesInConsul[k] = v
   762  			}
   763  		}
   764  	}
   765  
   766  	// Compute whether we are still in probation period where we will avoid
   767  	// de-registering services.
   768  	inProbation := time.Now().Before(c.deregisterProbationExpiry)
   769  
   770  	// Remove Nomad services in Consul but unknown to Nomad.
   771  	for id := range servicesInConsul {
   772  		if _, ok := c.services[id]; ok {
   773  			// Known service, skip
   774  			continue
   775  		}
   776  
   777  		// Ignore if this is not a Nomad managed service. Also ignore
   778  		// Nomad managed services if this is not a client agent.
   779  		// This is to prevent server agents from removing services
   780  		// registered by client agents
   781  		if !isNomadService(id) || !c.isClientAgent {
   782  			// Not managed by Nomad, skip
   783  			continue
   784  		}
   785  
   786  		// Ignore unknown services during probation
   787  		if inProbation && !c.explicitlyDeregisteredServices.Contains(id) {
   788  			continue
   789  		}
   790  
   791  		// Ignore if this is a service for a Nomad managed sidecar proxy.
   792  		if maybeConnectSidecar(id) {
   793  			continue
   794  		}
   795  
   796  		// Get the Consul namespace this service is in.
   797  		ns := servicesInConsul[id].Namespace
   798  
   799  		// If this service has a sidecar, we need to remove the sidecar first,
   800  		// otherwise Consul will produce a warning and an error when removing
   801  		// the parent service.
   802  		//
   803  		// The sidecar is not tracked on the Nomad side; it was registered
   804  		// implicitly through the parent service.
   805  		if sidecar := getNomadSidecar(id, servicesInConsul); sidecar != nil {
   806  			if err := c.agentAPI.ServiceDeregisterOpts(sidecar.ID, &api.QueryOptions{Namespace: ns}); err != nil {
   807  				metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   808  				return err
   809  			}
   810  		}
   811  
   812  		// Remove the unwanted service.
   813  		if err := c.agentAPI.ServiceDeregisterOpts(id, &api.QueryOptions{Namespace: ns}); err != nil {
   814  			if isOldNomadService(id) {
   815  				// Don't hard-fail on old entries. See #3620
   816  				continue
   817  			}
   818  
   819  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   820  			return err
   821  		}
   822  		sdereg++
   823  		metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1)
   824  	}
   825  
   826  	// Add Nomad managed services missing in Consul, or updated via Nomad.
   827  	for id, serviceInNomad := range c.services {
   828  		serviceInConsul, exists := servicesInConsul[id]
   829  		sidecarInConsul := getNomadSidecar(id, servicesInConsul)
   830  
   831  		if !exists || c.agentServiceUpdateRequired(reason, serviceInNomad, serviceInConsul, sidecarInConsul) {
   832  			c.logger.Trace("must register service", "id", id, "exists", exists, "reason", reason)
   833  			if err = c.agentAPI.ServiceRegister(serviceInNomad); err != nil {
   834  				metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   835  				return err
   836  			}
   837  			sreg++
   838  			metrics.IncrCounter([]string{"client", "consul", "service_registrations"}, 1)
   839  		}
   840  
   841  	}
   842  
   843  	checksInConsul := make(map[string]*api.AgentCheck)
   844  	for _, namespace := range namespaces {
   845  		nsChecks, err := c.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
   846  		if err != nil {
   847  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   848  			return fmt.Errorf("failed to query Consul checks: %w", err)
   849  		}
   850  		for k, v := range nsChecks {
   851  			checksInConsul[k] = v
   852  		}
   853  	}
   854  
   855  	// Remove Nomad checks in Consul but unknown locally
   856  	for id, check := range checksInConsul {
   857  		if _, ok := c.checks[id]; ok {
   858  			// Known check, leave it
   859  			continue
   860  		}
   861  
   862  		// Ignore if this is not a Nomad managed check. Also ignore
   863  		// Nomad managed checks if this is not a client agent.
   864  		// This is to prevent server agents from removing checks
   865  		// registered by client agents
   866  		if !isNomadService(check.ServiceID) || !c.isClientAgent || !isNomadCheck(check.CheckID) {
   867  			// Service not managed by Nomad, skip
   868  			continue
   869  		}
   870  
   871  		// Ignore unknown services during probation
   872  		if inProbation && !c.explicitlyDeregisteredChecks.Contains(id) {
   873  			continue
   874  		}
   875  
   876  		// Ignore if this is a check for a Nomad managed sidecar proxy.
   877  		if maybeSidecarProxyCheck(id) {
   878  			continue
   879  		}
   880  
   881  		// Unknown Nomad managed check; remove
   882  		if err := c.agentAPI.CheckDeregisterOpts(id, &api.QueryOptions{Namespace: check.Namespace}); err != nil {
   883  			if isOldNomadService(check.ServiceID) {
   884  				// Don't hard-fail on old entries.
   885  				continue
   886  			}
   887  
   888  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   889  			return err
   890  		}
   891  		cdereg++
   892  		metrics.IncrCounter([]string{"client", "consul", "check_deregistrations"}, 1)
   893  	}
   894  
   895  	// Add Nomad checks missing from Consul
   896  	for id, check := range c.checks {
   897  		if _, ok := checksInConsul[id]; ok {
   898  			// Already in Consul; skipping
   899  			continue
   900  		}
   901  		if err := c.agentAPI.CheckRegister(check); err != nil {
   902  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   903  			return err
   904  		}
   905  		creg++
   906  		metrics.IncrCounter([]string{"client", "consul", "check_registrations"}, 1)
   907  	}
   908  
   909  	// Only log if something was actually synced
   910  	if sreg > 0 || sdereg > 0 || creg > 0 || cdereg > 0 {
   911  		c.logger.Debug("sync complete", "registered_services", sreg, "deregistered_services", sdereg,
   912  			"registered_checks", creg, "deregistered_checks", cdereg)
   913  	}
   914  	return nil
   915  }
   916  
   917  // RegisterAgent registers Nomad agents (client or server). The
   918  // Service.PortLabel should be a literal port to be parsed with SplitHostPort.
   919  // Script checks are not supported and will return an error. Registration is
   920  // asynchronous.
   921  //
   922  // Agents will be deregistered when Shutdown is called.
   923  //
   924  // Note: no need to manually plumb Consul namespace into the agent service registration
   925  // or its check registrations, because the Nomad Client's Consul Client will already
   926  // have the Nomad Client's Consul Namespace set on startup.
   927  func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error {
   928  	ops := operations{}
   929  
   930  	for _, service := range services {
   931  		id := makeAgentServiceID(role, service)
   932  
   933  		// Unlike tasks, agents don't use port labels. Agent ports are
   934  		// stored directly in the PortLabel.
   935  		host, rawport, err := net.SplitHostPort(service.PortLabel)
   936  		if err != nil {
   937  			return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err)
   938  		}
   939  		port, err := strconv.Atoi(rawport)
   940  		if err != nil {
   941  			return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err)
   942  		}
   943  		serviceReg := &api.AgentServiceRegistration{
   944  			ID:      id,
   945  			Name:    service.Name,
   946  			Tags:    service.Tags,
   947  			Address: host,
   948  			Port:    port,
   949  			// This enables the consul UI to show that Nomad registered this service
   950  			Meta: map[string]string{
   951  				"external-source": "nomad",
   952  			},
   953  		}
   954  		ops.regServices = append(ops.regServices, serviceReg)
   955  
   956  		for _, check := range service.Checks {
   957  			checkID := MakeCheckID(id, check)
   958  			if check.Type == structs.ServiceCheckScript {
   959  				return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name)
   960  			}
   961  			checkHost, checkPort := serviceReg.Address, serviceReg.Port
   962  			if check.PortLabel != "" {
   963  				// Unlike tasks, agents don't use port labels. Agent ports are
   964  				// stored directly in the PortLabel.
   965  				host, rawport, err := net.SplitHostPort(check.PortLabel)
   966  				if err != nil {
   967  					return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err)
   968  				}
   969  				port, err := strconv.Atoi(rawport)
   970  				if err != nil {
   971  					return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err)
   972  				}
   973  				checkHost, checkPort = host, port
   974  			}
   975  			checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort, "")
   976  			if err != nil {
   977  				return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   978  			}
   979  			ops.regChecks = append(ops.regChecks, checkReg)
   980  		}
   981  	}
   982  
   983  	// Don't bother committing agent checks if we're already shutting down
   984  	c.agentLock.Lock()
   985  	defer c.agentLock.Unlock()
   986  	select {
   987  	case <-c.shutdownCh:
   988  		return nil
   989  	default:
   990  	}
   991  
   992  	// Now add them to the registration queue
   993  	c.commit(&ops)
   994  
   995  	// Record IDs for deregistering on shutdown
   996  	for _, id := range ops.regServices {
   997  		c.agentServices.Insert(id.ID)
   998  	}
   999  	for _, id := range ops.regChecks {
  1000  		c.agentChecks.Insert(id.ID)
  1001  	}
  1002  	return nil
  1003  }
  1004  
  1005  // serviceRegs creates service registrations, check registrations, and script
  1006  // checks from a service. It returns a service registration object with the
  1007  // service and check IDs populated.
  1008  func (c *ServiceClient) serviceRegs(
  1009  	ops *operations,
  1010  	service *structs.Service,
  1011  	workload *serviceregistration.WorkloadServices,
  1012  ) (*serviceregistration.ServiceRegistration, error) {
  1013  
  1014  	// Get the services ID
  1015  	id := serviceregistration.MakeAllocServiceID(workload.AllocInfo.AllocID, workload.Name(), service)
  1016  	sreg := &serviceregistration.ServiceRegistration{
  1017  		ServiceID:     id,
  1018  		CheckIDs:      make(map[string]struct{}, len(service.Checks)),
  1019  		CheckOnUpdate: make(map[string]string, len(service.Checks)),
  1020  	}
  1021  
  1022  	// Service address modes default to auto
  1023  	addrMode := service.AddressMode
  1024  	if addrMode == "" {
  1025  		addrMode = structs.AddressModeAuto
  1026  	}
  1027  
  1028  	// Determine the address to advertise based on the mode
  1029  	ip, port, err := serviceregistration.GetAddress(
  1030  		service.Address, addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus)
  1031  	if err != nil {
  1032  		return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err)
  1033  	}
  1034  
  1035  	// Determine whether to use tags or canary_tags
  1036  	var tags []string
  1037  	if workload.Canary && len(service.CanaryTags) > 0 {
  1038  		tags = make([]string, len(service.CanaryTags))
  1039  		copy(tags, service.CanaryTags)
  1040  	} else {
  1041  		tags = make([]string, len(service.Tags))
  1042  		copy(tags, service.Tags)
  1043  	}
  1044  
  1045  	// newConnect returns (nil, nil) if there's no Connect-enabled service.
  1046  	connect, err := newConnect(id, workload.AllocInfo, service.Name, service.Connect, workload.Networks, workload.Ports)
  1047  	if err != nil {
  1048  		return nil, fmt.Errorf("invalid Consul Connect configuration for service %q: %v", service.Name, err)
  1049  	}
  1050  
  1051  	// newConnectGateway returns nil if there's no Connect gateway.
  1052  	gateway := newConnectGateway(service.Connect)
  1053  
  1054  	// Determine whether to use meta or canary_meta
  1055  	var meta map[string]string
  1056  	if workload.Canary && len(service.CanaryMeta) > 0 {
  1057  		meta = make(map[string]string, len(service.CanaryMeta)+1)
  1058  		for k, v := range service.CanaryMeta {
  1059  			meta[k] = v
  1060  		}
  1061  	} else {
  1062  		meta = make(map[string]string, len(service.Meta)+1)
  1063  		for k, v := range service.Meta {
  1064  			meta[k] = v
  1065  		}
  1066  	}
  1067  
  1068  	// This enables the consul UI to show that Nomad registered this service
  1069  	meta["external-source"] = "nomad"
  1070  
  1071  	// Explicitly set the Consul service Kind in case this service represents
  1072  	// one of the Connect gateway types.
  1073  	kind := api.ServiceKindTypical
  1074  	switch {
  1075  	case service.Connect.IsIngress():
  1076  		kind = api.ServiceKindIngressGateway
  1077  	case service.Connect.IsTerminating():
  1078  		kind = api.ServiceKindTerminatingGateway
  1079  
  1080  		if proxy := service.Connect.Gateway.Proxy; proxy != nil {
  1081  			// set the default port if bridge / default listener set
  1082  			if defaultBind, exists := proxy.EnvoyGatewayBindAddresses["default"]; exists {
  1083  				portLabel := envoy.PortLabel(structs.ConnectTerminatingPrefix, service.Name, "")
  1084  				if dynPort, ok := workload.Ports.Get(portLabel); ok {
  1085  					defaultBind.Port = dynPort.Value
  1086  				}
  1087  			}
  1088  		}
  1089  	case service.Connect.IsMesh():
  1090  		kind = api.ServiceKindMeshGateway
  1091  
  1092  		if proxy := service.Connect.Gateway.Proxy; proxy != nil {
  1093  			// wan uses the service port label, which is typically on a discrete host_network
  1094  			if wanBind, exists := proxy.EnvoyGatewayBindAddresses["wan"]; exists {
  1095  				if wanPort, ok := workload.Ports.Get(service.PortLabel); ok {
  1096  					wanBind.Port = wanPort.Value
  1097  				}
  1098  			}
  1099  			// lan uses a nomad generated dynamic port on the default network
  1100  			if lanBind, exists := proxy.EnvoyGatewayBindAddresses["lan"]; exists {
  1101  				portLabel := envoy.PortLabel(structs.ConnectMeshPrefix, service.Name, "lan")
  1102  				if dynPort, ok := workload.Ports.Get(portLabel); ok {
  1103  					lanBind.Port = dynPort.Value
  1104  				}
  1105  			}
  1106  		}
  1107  	}
  1108  
  1109  	taggedAddresses, err := parseTaggedAddresses(service.TaggedAddresses, port)
  1110  	if err != nil {
  1111  		return nil, err
  1112  	}
  1113  
  1114  	// Build the Consul Service registration request
  1115  	serviceReg := &api.AgentServiceRegistration{
  1116  		Kind:              kind,
  1117  		ID:                id,
  1118  		Name:              service.Name,
  1119  		Namespace:         workload.ProviderNamespace,
  1120  		Tags:              tags,
  1121  		EnableTagOverride: service.EnableTagOverride,
  1122  		Address:           ip,
  1123  		Port:              port,
  1124  		Meta:              meta,
  1125  		TaggedAddresses:   taggedAddresses,
  1126  		Connect:           connect, // will be nil if no Connect block
  1127  		Proxy:             gateway, // will be nil if no Connect Gateway block
  1128  		Checks:            make([]*api.AgentServiceCheck, 0, len(service.Checks)),
  1129  	}
  1130  	ops.regServices = append(ops.regServices, serviceReg)
  1131  
  1132  	// Build the check registrations
  1133  	checkRegs, err := c.checkRegs(id, service, workload, sreg)
  1134  	if err != nil {
  1135  		return nil, err
  1136  	}
  1137  
  1138  	for _, registration := range checkRegs {
  1139  		sreg.CheckIDs[registration.ID] = struct{}{}
  1140  		ops.regChecks = append(ops.regChecks, registration)
  1141  		serviceReg.Checks = append(
  1142  			serviceReg.Checks,
  1143  			apiCheckRegistrationToCheck(registration),
  1144  		)
  1145  	}
  1146  
  1147  	return sreg, nil
  1148  }
  1149  
  1150  // apiCheckRegistrationToCheck converts a check registration to a check, so that
  1151  // we can include them in the initial service registration. It is expected the
  1152  // Nomad-conversion (e.g. turning script checks into ttl checks) has already been
  1153  // applied.
  1154  func apiCheckRegistrationToCheck(r *api.AgentCheckRegistration) *api.AgentServiceCheck {
  1155  	return &api.AgentServiceCheck{
  1156  		CheckID:                r.ID,
  1157  		Name:                   r.Name,
  1158  		Interval:               r.Interval,
  1159  		Timeout:                r.Timeout,
  1160  		TTL:                    r.TTL,
  1161  		HTTP:                   r.HTTP,
  1162  		Header:                 maps.Clone(r.Header),
  1163  		Method:                 r.Method,
  1164  		Body:                   r.Body,
  1165  		TCP:                    r.TCP,
  1166  		Status:                 r.Status,
  1167  		TLSServerName:          r.TLSServerName,
  1168  		TLSSkipVerify:          r.TLSSkipVerify,
  1169  		GRPC:                   r.GRPC,
  1170  		GRPCUseTLS:             r.GRPCUseTLS,
  1171  		SuccessBeforePassing:   r.SuccessBeforePassing,
  1172  		FailuresBeforeCritical: r.FailuresBeforeCritical,
  1173  	}
  1174  }
  1175  
  1176  // checkRegs creates check registrations for the given service
  1177  func (c *ServiceClient) checkRegs(
  1178  	serviceID string,
  1179  	service *structs.Service,
  1180  	workload *serviceregistration.WorkloadServices,
  1181  	sreg *serviceregistration.ServiceRegistration,
  1182  ) ([]*api.AgentCheckRegistration, error) {
  1183  
  1184  	registrations := make([]*api.AgentCheckRegistration, 0, len(service.Checks))
  1185  	for _, check := range service.Checks {
  1186  		var ip string
  1187  		var port int
  1188  
  1189  		if check.Type != structs.ServiceCheckScript {
  1190  			portLabel := check.PortLabel
  1191  			if portLabel == "" {
  1192  				portLabel = service.PortLabel
  1193  			}
  1194  
  1195  			addrMode := check.AddressMode
  1196  			if addrMode == "" {
  1197  				if service.Address != "" {
  1198  					// if the service is using a custom address, enable the check
  1199  					// to use that address
  1200  					addrMode = structs.AddressModeAuto
  1201  				} else {
  1202  					// otherwise default to the host address
  1203  					addrMode = structs.AddressModeHost
  1204  				}
  1205  			}
  1206  
  1207  			var err error
  1208  			ip, port, err = serviceregistration.GetAddress(
  1209  				service.Address, addrMode, portLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus)
  1210  			if err != nil {
  1211  				return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err)
  1212  			}
  1213  		}
  1214  
  1215  		checkID := MakeCheckID(serviceID, check)
  1216  		registration, err := createCheckReg(serviceID, checkID, check, ip, port, workload.ProviderNamespace)
  1217  		if err != nil {
  1218  			return nil, fmt.Errorf("failed to add check %q: %v", check.Name, err)
  1219  		}
  1220  		sreg.CheckOnUpdate[checkID] = check.OnUpdate
  1221  		registrations = append(registrations, registration)
  1222  	}
  1223  
  1224  	return registrations, nil
  1225  }
  1226  
  1227  // RegisterWorkload with Consul. Adds all service entries and checks to Consul.
  1228  //
  1229  // If the service IP is set it used as the address in the service registration.
  1230  // Checks will always use the IP from the Task struct (host's IP).
  1231  //
  1232  // Actual communication with Consul is done asynchronously (see Run).
  1233  func (c *ServiceClient) RegisterWorkload(workload *serviceregistration.WorkloadServices) error {
  1234  	// Fast path
  1235  	numServices := len(workload.Services)
  1236  	if numServices == 0 {
  1237  		return nil
  1238  	}
  1239  
  1240  	t := new(serviceregistration.ServiceRegistrations)
  1241  	t.Services = make(map[string]*serviceregistration.ServiceRegistration, numServices)
  1242  
  1243  	ops := &operations{}
  1244  	for _, service := range workload.Services {
  1245  		sreg, err := c.serviceRegs(ops, service, workload)
  1246  		if err != nil {
  1247  			return err
  1248  		}
  1249  		t.Services[sreg.ServiceID] = sreg
  1250  	}
  1251  
  1252  	// Add the workload to the allocation's registration
  1253  	c.addRegistrations(workload.AllocInfo.AllocID, workload.Name(), t)
  1254  
  1255  	c.commit(ops)
  1256  
  1257  	// Start watching checks. Done after service registrations are built
  1258  	// since an error building them could leak watches.
  1259  	for _, service := range workload.Services {
  1260  		serviceID := serviceregistration.MakeAllocServiceID(workload.AllocInfo.AllocID, workload.Name(), service)
  1261  		for _, check := range service.Checks {
  1262  			if check.TriggersRestarts() {
  1263  				checkID := MakeCheckID(serviceID, check)
  1264  				c.checkWatcher.Watch(workload.AllocInfo.AllocID, workload.Name(), checkID, check, workload.Restarter)
  1265  			}
  1266  		}
  1267  	}
  1268  	return nil
  1269  }
  1270  
  1271  // UpdateWorkload in Consul. Does not alter the service if only checks have
  1272  // changed.
  1273  //
  1274  // DriverNetwork must not change between invocations for the same allocation.
  1275  func (c *ServiceClient) UpdateWorkload(old, newWorkload *serviceregistration.WorkloadServices) error {
  1276  	ops := new(operations)
  1277  	regs := new(serviceregistration.ServiceRegistrations)
  1278  	regs.Services = make(map[string]*serviceregistration.ServiceRegistration, len(newWorkload.Services))
  1279  
  1280  	newIDs := make(map[string]*structs.Service, len(newWorkload.Services))
  1281  	for _, s := range newWorkload.Services {
  1282  		newIDs[serviceregistration.MakeAllocServiceID(newWorkload.AllocInfo.AllocID, newWorkload.Name(), s)] = s
  1283  	}
  1284  
  1285  	// Loop over existing Services to see if they have been removed
  1286  	for _, existingSvc := range old.Services {
  1287  		existingID := serviceregistration.MakeAllocServiceID(old.AllocInfo.AllocID, old.Name(), existingSvc)
  1288  		newSvc, ok := newIDs[existingID]
  1289  
  1290  		if !ok {
  1291  			// Existing service entry removed
  1292  			ops.deregServices = append(ops.deregServices, existingID)
  1293  			for _, check := range existingSvc.Checks {
  1294  				cid := MakeCheckID(existingID, check)
  1295  				ops.deregChecks = append(ops.deregChecks, cid)
  1296  
  1297  				// Unwatch watched checks
  1298  				if check.TriggersRestarts() {
  1299  					c.checkWatcher.Unwatch(cid)
  1300  				}
  1301  			}
  1302  			continue
  1303  		}
  1304  
  1305  		oldHash := existingSvc.Hash(old.AllocInfo.AllocID, old.Name(), old.Canary)
  1306  		newHash := newSvc.Hash(newWorkload.AllocInfo.AllocID, newWorkload.Name(), newWorkload.Canary)
  1307  		if oldHash == newHash {
  1308  			// Service exists and hasn't changed, don't re-add it later
  1309  			delete(newIDs, existingID)
  1310  		}
  1311  
  1312  		// Service still exists so add it to the task's registration
  1313  		sreg := &serviceregistration.ServiceRegistration{
  1314  			ServiceID:     existingID,
  1315  			CheckIDs:      make(map[string]struct{}, len(newSvc.Checks)),
  1316  			CheckOnUpdate: make(map[string]string, len(newSvc.Checks)),
  1317  		}
  1318  		regs.Services[existingID] = sreg
  1319  
  1320  		// See if any checks were updated
  1321  		existingChecks := make(map[string]*structs.ServiceCheck, len(existingSvc.Checks))
  1322  		for _, check := range existingSvc.Checks {
  1323  			existingChecks[MakeCheckID(existingID, check)] = check
  1324  		}
  1325  
  1326  		// Register new checks
  1327  		for _, check := range newSvc.Checks {
  1328  			checkID := MakeCheckID(existingID, check)
  1329  			if _, exists := existingChecks[checkID]; exists {
  1330  				// Check is still required. Remove it from the map so it doesn't get
  1331  				// deleted later.
  1332  				delete(existingChecks, checkID)
  1333  				sreg.CheckIDs[checkID] = struct{}{}
  1334  				sreg.CheckOnUpdate[checkID] = check.OnUpdate
  1335  			}
  1336  
  1337  			// New check on an unchanged service; add them now
  1338  			checkRegs, err := c.checkRegs(existingID, newSvc, newWorkload, sreg)
  1339  			if err != nil {
  1340  				return err
  1341  			}
  1342  
  1343  			for _, registration := range checkRegs {
  1344  				sreg.CheckIDs[registration.ID] = struct{}{}
  1345  				sreg.CheckOnUpdate[registration.ID] = check.OnUpdate
  1346  				ops.regChecks = append(ops.regChecks, registration)
  1347  			}
  1348  
  1349  			// Update all watched checks as CheckRestart fields aren't part of ID
  1350  			if check.TriggersRestarts() {
  1351  				c.checkWatcher.Watch(newWorkload.AllocInfo.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
  1352  			}
  1353  		}
  1354  
  1355  		// Remove existing checks not in updated service
  1356  		for cid, check := range existingChecks {
  1357  			ops.deregChecks = append(ops.deregChecks, cid)
  1358  
  1359  			// Unwatch checks
  1360  			if check.TriggersRestarts() {
  1361  				c.checkWatcher.Unwatch(cid)
  1362  			}
  1363  		}
  1364  	}
  1365  
  1366  	// Any remaining services should just be enqueued directly
  1367  	for _, newSvc := range newIDs {
  1368  		sreg, err := c.serviceRegs(ops, newSvc, newWorkload)
  1369  		if err != nil {
  1370  			return err
  1371  		}
  1372  
  1373  		regs.Services[sreg.ServiceID] = sreg
  1374  	}
  1375  
  1376  	// Add the task to the allocation's registration
  1377  	c.addRegistrations(newWorkload.AllocInfo.AllocID, newWorkload.Name(), regs)
  1378  
  1379  	c.commit(ops)
  1380  
  1381  	// Start watching checks. Done after service registrations are built
  1382  	// since an error building them could leak watches.
  1383  	for serviceID, service := range newIDs {
  1384  		for _, check := range service.Checks {
  1385  			if check.TriggersRestarts() {
  1386  				checkID := MakeCheckID(serviceID, check)
  1387  				c.checkWatcher.Watch(newWorkload.AllocInfo.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter)
  1388  			}
  1389  		}
  1390  	}
  1391  
  1392  	return nil
  1393  }
  1394  
  1395  // RemoveWorkload from Consul. Removes all service entries and checks.
  1396  //
  1397  // Actual communication with Consul is done asynchronously (see Run).
  1398  func (c *ServiceClient) RemoveWorkload(workload *serviceregistration.WorkloadServices) {
  1399  	ops := operations{}
  1400  
  1401  	for _, service := range workload.Services {
  1402  		id := serviceregistration.MakeAllocServiceID(workload.AllocInfo.AllocID, workload.Name(), service)
  1403  		ops.deregServices = append(ops.deregServices, id)
  1404  
  1405  		for _, check := range service.Checks {
  1406  			cid := MakeCheckID(id, check)
  1407  			ops.deregChecks = append(ops.deregChecks, cid)
  1408  
  1409  			if check.TriggersRestarts() {
  1410  				c.checkWatcher.Unwatch(cid)
  1411  			}
  1412  		}
  1413  	}
  1414  
  1415  	// Remove the workload from the alloc's registrations
  1416  	c.removeRegistration(workload.AllocInfo.AllocID, workload.Name())
  1417  
  1418  	// Now add them to the deregistration fields; main Run loop will update
  1419  	c.commit(&ops)
  1420  }
  1421  
  1422  // normalizeNamespace will turn the "default" namespace into the empty string,
  1423  // so that Consul OSS will not produce an error setting something in the default
  1424  // namespace.
  1425  func normalizeNamespace(namespace string) string {
  1426  	if namespace == "default" {
  1427  		return ""
  1428  	}
  1429  	return namespace
  1430  }
  1431  
  1432  // AllocRegistrations returns the registrations for the given allocation. If the
  1433  // allocation has no registrations, the response is a nil object.
  1434  func (c *ServiceClient) AllocRegistrations(allocID string) (*serviceregistration.AllocRegistration, error) {
  1435  	// Get the internal struct using the lock
  1436  	c.allocRegistrationsLock.RLock()
  1437  	regInternal, ok := c.allocRegistrations[allocID]
  1438  	if !ok {
  1439  		c.allocRegistrationsLock.RUnlock()
  1440  		return nil, nil
  1441  	}
  1442  
  1443  	// Copy so we don't expose internal structs
  1444  	reg := regInternal.Copy()
  1445  	c.allocRegistrationsLock.RUnlock()
  1446  
  1447  	// Get the list of all namespaces created so we can iterate them.
  1448  	namespaces, err := c.namespacesClient.List()
  1449  	if err != nil {
  1450  		return nil, fmt.Errorf("failed to retrieve namespaces from consul: %w", err)
  1451  	}
  1452  
  1453  	services := make(map[string]*api.AgentService)
  1454  	checks := make(map[string]*api.AgentCheck)
  1455  
  1456  	// Query the services and checks to populate the allocation registrations.
  1457  	for _, namespace := range namespaces {
  1458  		nsServices, err := c.agentAPI.ServicesWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
  1459  		if err != nil {
  1460  			return nil, fmt.Errorf("failed to retrieve services from consul: %w", err)
  1461  		}
  1462  		for k, v := range nsServices {
  1463  			services[k] = v
  1464  		}
  1465  
  1466  		nsChecks, err := c.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
  1467  		if err != nil {
  1468  			return nil, fmt.Errorf("failed to retrieve checks from consul: %w", err)
  1469  		}
  1470  		for k, v := range nsChecks {
  1471  			checks[k] = v
  1472  		}
  1473  	}
  1474  
  1475  	// Populate the object
  1476  	for _, treg := range reg.Tasks {
  1477  		for serviceID, sreg := range treg.Services {
  1478  			sreg.Service = services[serviceID]
  1479  			for checkID := range sreg.CheckIDs {
  1480  				if check, ok := checks[checkID]; ok {
  1481  					sreg.Checks = append(sreg.Checks, check)
  1482  				}
  1483  			}
  1484  		}
  1485  	}
  1486  
  1487  	return reg, nil
  1488  }
  1489  
  1490  // UpdateTTL is used to update the TTL of a check. Typically this will only be
  1491  // called to heartbeat script checks.
  1492  func (c *ServiceClient) UpdateTTL(id, namespace, output, status string) error {
  1493  	ns := normalizeNamespace(namespace)
  1494  	return c.agentAPI.UpdateTTLOpts(id, output, status, &api.QueryOptions{Namespace: ns})
  1495  }
  1496  
  1497  // Shutdown the Consul client. Update running task registrations and deregister
  1498  // agent from Consul. On first call blocks up to shutdownWait before giving up
  1499  // on syncing operations.
  1500  func (c *ServiceClient) Shutdown() error {
  1501  	// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
  1502  	// entries.
  1503  	c.agentLock.Lock()
  1504  	defer c.agentLock.Unlock()
  1505  	select {
  1506  	case <-c.shutdownCh:
  1507  		return nil
  1508  	default:
  1509  		close(c.shutdownCh)
  1510  	}
  1511  
  1512  	// Give run loop time to sync, but don't block indefinitely
  1513  	deadline := time.After(c.shutdownWait)
  1514  
  1515  	// Wait for Run to finish any outstanding operations and exit
  1516  	select {
  1517  	case <-c.exitCh:
  1518  	case <-deadline:
  1519  		// Don't wait forever though
  1520  	}
  1521  
  1522  	// If Consul was never seen nothing could be written so exit early
  1523  	if !c.hasSeen() {
  1524  		return nil
  1525  	}
  1526  
  1527  	// Always attempt to deregister Nomad agent Consul entries, even if
  1528  	// deadline was reached
  1529  	for _, id := range c.agentServices.List() {
  1530  		if err := c.agentAPI.ServiceDeregisterOpts(id, nil); err != nil {
  1531  			c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
  1532  		}
  1533  	}
  1534  
  1535  	namespaces, err := c.namespacesClient.List()
  1536  	if err != nil {
  1537  		c.logger.Error("failed to retrieve namespaces from consul", "error", err)
  1538  	}
  1539  
  1540  	remainingChecks := make(map[string]*api.AgentCheck)
  1541  	for _, namespace := range namespaces {
  1542  		nsChecks, err := c.agentAPI.ChecksWithFilterOpts("", &api.QueryOptions{Namespace: normalizeNamespace(namespace)})
  1543  		if err != nil {
  1544  			c.logger.Error("failed to retrieve checks from consul", "error", err)
  1545  		}
  1546  		for k, v := range nsChecks {
  1547  			remainingChecks[k] = v
  1548  		}
  1549  	}
  1550  
  1551  	checkRemains := func(id string) bool {
  1552  		for _, c := range remainingChecks {
  1553  			if c.CheckID == id {
  1554  				return true
  1555  			}
  1556  		}
  1557  		return false
  1558  	}
  1559  
  1560  	for _, id := range c.agentChecks.List() {
  1561  		// if we couldn't populate remainingChecks it is unlikely that CheckDeregister will work, but try anyway
  1562  		// if we could list the remaining checks, verify that the check we store still exists before removing it.
  1563  		if remainingChecks == nil || checkRemains(id) {
  1564  			ns := remainingChecks[id].Namespace
  1565  			if err := c.agentAPI.CheckDeregisterOpts(id, &api.QueryOptions{Namespace: ns}); err != nil {
  1566  				c.logger.Error("failed deregistering agent check", "check_id", id, "error", err)
  1567  			}
  1568  		}
  1569  	}
  1570  
  1571  	return nil
  1572  }
  1573  
  1574  // addRegistration adds the service registrations for the given allocation.
  1575  func (c *ServiceClient) addRegistrations(allocID, taskName string, reg *serviceregistration.ServiceRegistrations) {
  1576  	c.allocRegistrationsLock.Lock()
  1577  	defer c.allocRegistrationsLock.Unlock()
  1578  
  1579  	alloc, ok := c.allocRegistrations[allocID]
  1580  	if !ok {
  1581  		alloc = &serviceregistration.AllocRegistration{
  1582  			Tasks: make(map[string]*serviceregistration.ServiceRegistrations),
  1583  		}
  1584  		c.allocRegistrations[allocID] = alloc
  1585  	}
  1586  	alloc.Tasks[taskName] = reg
  1587  }
  1588  
  1589  // removeRegistrations removes the registration for the given allocation.
  1590  func (c *ServiceClient) removeRegistration(allocID, taskName string) {
  1591  	c.allocRegistrationsLock.Lock()
  1592  	defer c.allocRegistrationsLock.Unlock()
  1593  
  1594  	alloc, ok := c.allocRegistrations[allocID]
  1595  	if !ok {
  1596  		return
  1597  	}
  1598  
  1599  	// Delete the task and if it is the last one also delete the alloc's
  1600  	// registration
  1601  	delete(alloc.Tasks, taskName)
  1602  	if len(alloc.Tasks) == 0 {
  1603  		delete(c.allocRegistrations, allocID)
  1604  	}
  1605  }
  1606  
  1607  // makeAgentServiceID creates a unique ID for identifying an agent service in
  1608  // Consul.
  1609  //
  1610  // Agent service IDs are of the form:
  1611  //
  1612  //	{nomadServicePrefix}-{ROLE}-b32(sha1({Service.Name}-{Service.Tags...})
  1613  //	Example Server ID: _nomad-server-fbbk265qn4tmt25nd4ep42tjvmyj3hr4
  1614  //	Example Client ID: _nomad-client-ggnjpgl7yn7rgmvxzilmpvrzzvrszc7l
  1615  func makeAgentServiceID(role string, service *structs.Service) string {
  1616  	return fmt.Sprintf("%s-%s-%s", nomadServicePrefix, role, service.Hash(role, "", false))
  1617  }
  1618  
  1619  // MakeCheckID creates a unique ID for a check.
  1620  //
  1621  //	Example Check ID: _nomad-check-434ae42f9a57c5705344974ac38de2aee0ee089d
  1622  func MakeCheckID(serviceID string, check *structs.ServiceCheck) string {
  1623  	return fmt.Sprintf("%s%s", nomadCheckPrefix, check.Hash(serviceID))
  1624  }
  1625  
  1626  // createCheckReg creates a Check that can be registered with Consul.
  1627  //
  1628  // Script checks simply have a TTL set and the caller is responsible for
  1629  // running the script and heart-beating.
  1630  func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int, namespace string) (*api.AgentCheckRegistration, error) {
  1631  	chkReg := api.AgentCheckRegistration{
  1632  		ID:        checkID,
  1633  		Name:      check.Name,
  1634  		ServiceID: serviceID,
  1635  		Namespace: normalizeNamespace(namespace),
  1636  	}
  1637  	chkReg.Status = check.InitialStatus
  1638  	chkReg.Timeout = check.Timeout.String()
  1639  	chkReg.Interval = check.Interval.String()
  1640  	chkReg.SuccessBeforePassing = check.SuccessBeforePassing
  1641  	chkReg.FailuresBeforeCritical = check.FailuresBeforeCritical
  1642  
  1643  	// Require an address for http or tcp checks
  1644  	if port == 0 && check.RequiresPort() {
  1645  		return nil, fmt.Errorf("%s checks require an address", check.Type)
  1646  	}
  1647  
  1648  	switch check.Type {
  1649  	case structs.ServiceCheckHTTP:
  1650  		proto := check.Protocol
  1651  		if proto == "" {
  1652  			proto = "http"
  1653  		}
  1654  		if check.TLSSkipVerify {
  1655  			chkReg.TLSSkipVerify = true
  1656  		}
  1657  		chkReg.TLSServerName = check.TLSServerName
  1658  		base := url.URL{
  1659  			Scheme: proto,
  1660  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
  1661  		}
  1662  		relative, err := url.Parse(check.Path)
  1663  		if err != nil {
  1664  			return nil, err
  1665  		}
  1666  		checkURL := base.ResolveReference(relative)
  1667  		chkReg.HTTP = checkURL.String()
  1668  		chkReg.Method = check.Method
  1669  		chkReg.Header = check.Header
  1670  		chkReg.Body = check.Body
  1671  
  1672  	case structs.ServiceCheckTCP:
  1673  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
  1674  
  1675  	case structs.ServiceCheckScript:
  1676  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
  1677  		// As of Consul 1.0.0 setting TTL and Interval is a 400
  1678  		chkReg.Interval = ""
  1679  
  1680  	case structs.ServiceCheckGRPC:
  1681  		chkReg.GRPC = fmt.Sprintf("%s/%s", net.JoinHostPort(host, strconv.Itoa(port)), check.GRPCService)
  1682  		chkReg.GRPCUseTLS = check.GRPCUseTLS
  1683  		if check.TLSSkipVerify {
  1684  			chkReg.TLSSkipVerify = true
  1685  		}
  1686  		chkReg.TLSServerName = check.TLSServerName
  1687  
  1688  	default:
  1689  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
  1690  	}
  1691  	return &chkReg, nil
  1692  }
  1693  
  1694  // isNomadClient returns true if id represents a Nomad Client registration.
  1695  func isNomadClient(id string) bool {
  1696  	return strings.HasPrefix(id, nomadClientPrefix)
  1697  }
  1698  
  1699  // isNomadServer returns true if id represents a Nomad Server registration.
  1700  func isNomadServer(id string) bool {
  1701  	return strings.HasPrefix(id, nomadServerPrefix)
  1702  }
  1703  
  1704  // isNomadAgent returns true if id represents a Nomad Client or Server registration.
  1705  func isNomadAgent(id string) bool {
  1706  	return isNomadClient(id) || isNomadServer(id)
  1707  }
  1708  
  1709  // isNomadService returns true if the ID matches the pattern of a Nomad managed
  1710  // service (new or old formats). Agent services return false as independent
  1711  // client and server agents may be running on the same machine. #2827
  1712  func isNomadService(id string) bool {
  1713  	return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id)
  1714  }
  1715  
  1716  // isNomadCheck returns true if the ID matches the pattern of a Nomad managed
  1717  // check.
  1718  func isNomadCheck(id string) bool {
  1719  	return strings.HasPrefix(id, nomadCheckPrefix)
  1720  }
  1721  
  1722  // isOldNomadService returns true if the ID matches an old pattern managed by
  1723  // Nomad.
  1724  //
  1725  // Pre-0.7.1 task service IDs are of the form:
  1726  //
  1727  //	{nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
  1728  //	Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
  1729  func isOldNomadService(id string) bool {
  1730  	const prefix = nomadServicePrefix + "-executor"
  1731  	return strings.HasPrefix(id, prefix)
  1732  }
  1733  
  1734  const (
  1735  	sidecarSuffix = "-sidecar-proxy"
  1736  )
  1737  
  1738  // maybeConnectSidecar returns true if the ID is likely of a Connect sidecar proxy.
  1739  // This function should only be used to determine if Nomad should skip managing
  1740  // service id; it could produce false negatives for non-Nomad managed services
  1741  // (i.e. someone set the ID manually), but Nomad does not manage those anyway.
  1742  //
  1743  // It is important not to reference the parent service, which may or may not still
  1744  // be tracked by Nomad internally.
  1745  //
  1746  // For example if you have a Connect enabled service with the ID:
  1747  //
  1748  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db
  1749  //
  1750  // Consul will create a service for the sidecar proxy with the ID:
  1751  //
  1752  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy
  1753  func maybeConnectSidecar(id string) bool {
  1754  	return strings.HasSuffix(id, sidecarSuffix)
  1755  }
  1756  
  1757  var (
  1758  	sidecarProxyCheckRe = regexp.MustCompile(`^service:_nomad-.+-sidecar-proxy(:[\d]+)?$`)
  1759  )
  1760  
  1761  // maybeSidecarProxyCheck returns true if the ID likely matches a Nomad generated
  1762  // check ID used in the context of a Nomad managed Connect sidecar proxy. This function
  1763  // should only be used to determine if Nomad should skip managing a check; it can
  1764  // produce false negatives for non-Nomad managed Connect sidecar proxy checks (i.e.
  1765  // someone set the ID manually), but Nomad does not manage those anyway.
  1766  //
  1767  // For example if you have a Connect enabled service with the ID:
  1768  //
  1769  //	_nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db
  1770  //
  1771  // Nomad will create a Connect sidecar proxy of ID:
  1772  //
  1773  // _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy
  1774  //
  1775  // With default checks like:
  1776  //
  1777  // service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:1
  1778  // service:_nomad-task-2f5fb517-57d4-44ee-7780-dc1cb6e103cd-group-api-count-api-9001-sidecar-proxy:2
  1779  //
  1780  // Unless sidecar_service.disable_default_tcp_check is set, in which case the
  1781  // default check is:
  1782  //
  1783  // service:_nomad-task-322616db-2680-35d8-0d10-b50a0a0aa4cd-group-api-count-api-9001-sidecar-proxy
  1784  func maybeSidecarProxyCheck(id string) bool {
  1785  	return sidecarProxyCheckRe.MatchString(id)
  1786  }
  1787  
  1788  // getNomadSidecar returns the service registration of the sidecar for the managed
  1789  // service with the specified id.
  1790  //
  1791  // If the managed service of the specified id does not exist, or the service does
  1792  // not have a sidecar proxy, nil is returned.
  1793  func getNomadSidecar(id string, services map[string]*api.AgentService) *api.AgentService {
  1794  	if _, exists := services[id]; !exists {
  1795  		return nil
  1796  	}
  1797  
  1798  	sidecarID := id + sidecarSuffix
  1799  	return services[sidecarID]
  1800  }
  1801  
  1802  func parseAddress(raw string, port int) (api.ServiceAddress, error) {
  1803  	result := api.ServiceAddress{}
  1804  	addr, portStr, err := net.SplitHostPort(raw)
  1805  	// Error message from Go's net/ipsock.go
  1806  	if err != nil {
  1807  		if !strings.Contains(err.Error(), "missing port in address") {
  1808  			return result, fmt.Errorf("error parsing address %q: %v", raw, err)
  1809  		}
  1810  
  1811  		// Use the whole input as the address if there wasn't a port.
  1812  		if ip := net.ParseIP(raw); ip == nil {
  1813  			return result, fmt.Errorf("error parsing address %q: not an IP address", raw)
  1814  		}
  1815  		addr = raw
  1816  	}
  1817  
  1818  	if portStr != "" {
  1819  		port, err = strconv.Atoi(portStr)
  1820  		if err != nil {
  1821  			return result, fmt.Errorf("error parsing port %q: %v", portStr, err)
  1822  		}
  1823  	}
  1824  
  1825  	result.Address = addr
  1826  	result.Port = port
  1827  	return result, nil
  1828  }
  1829  
  1830  // morph the tagged_addresses map into the structure consul api wants
  1831  func parseTaggedAddresses(m map[string]string, port int) (map[string]api.ServiceAddress, error) {
  1832  	result := make(map[string]api.ServiceAddress, len(m))
  1833  	for k, v := range m {
  1834  		sa, err := parseAddress(v, port)
  1835  		if err != nil {
  1836  			return nil, err
  1837  		}
  1838  		result[k] = sa
  1839  	}
  1840  	return result, nil
  1841  }