github.com/ferranbt/nomad@v0.9.3-0.20190607002617-85c449b7667c/command/agent/consul/client.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"net/url"
     8  	"reflect"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	metrics "github.com/armon/go-metrics"
    16  	log "github.com/hashicorp/go-hclog"
    17  
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/nomad/helper"
    20  	"github.com/hashicorp/nomad/nomad/structs"
    21  	"github.com/hashicorp/nomad/plugins/drivers"
    22  )
    23  
    24  const (
    25  	// nomadServicePrefix is the prefix that scopes all Nomad registered
    26  	// services (both agent and task entries).
    27  	nomadServicePrefix = "_nomad"
    28  
    29  	// nomadTaskPrefix is the prefix that scopes Nomad registered services
    30  	// for tasks.
    31  	nomadTaskPrefix = nomadServicePrefix + "-task-"
    32  
    33  	// nomadCheckPrefix is the prefix that scopes Nomad registered checks for
    34  	// services.
    35  	nomadCheckPrefix = nomadServicePrefix + "-check-"
    36  
    37  	// defaultRetryInterval is how quickly to retry syncing services and
    38  	// checks to Consul when an error occurs. Will backoff up to a max.
    39  	defaultRetryInterval = time.Second
    40  
    41  	// defaultMaxRetryInterval is the default max retry interval.
    42  	defaultMaxRetryInterval = 30 * time.Second
    43  
    44  	// defaultPeriodicalInterval is the interval at which the service
    45  	// client reconciles state between the desired services and checks and
    46  	// what's actually registered in Consul. This is done at an interval,
    47  	// rather than being purely edge triggered, to handle the case that the
    48  	// Consul agent's state may change underneath us
    49  	defaultPeriodicInterval = 30 * time.Second
    50  
    51  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    52  	// the check result
    53  	ttlCheckBuffer = 31 * time.Second
    54  
    55  	// defaultShutdownWait is how long Shutdown() should block waiting for
    56  	// enqueued operations to sync to Consul by default.
    57  	defaultShutdownWait = time.Minute
    58  
    59  	// DefaultQueryWaitDuration is the max duration the Consul Agent will
    60  	// spend waiting for a response from a Consul Query.
    61  	DefaultQueryWaitDuration = 2 * time.Second
    62  
    63  	// ServiceTagHTTP is the tag assigned to HTTP services
    64  	ServiceTagHTTP = "http"
    65  
    66  	// ServiceTagRPC is the tag assigned to RPC services
    67  	ServiceTagRPC = "rpc"
    68  
    69  	// ServiceTagSerf is the tag assigned to Serf services
    70  	ServiceTagSerf = "serf"
    71  )
    72  
    73  // CatalogAPI is the consul/api.Catalog API used by Nomad.
    74  type CatalogAPI interface {
    75  	Datacenters() ([]string, error)
    76  	Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error)
    77  }
    78  
    79  // AgentAPI is the consul/api.Agent API used by Nomad.
    80  type AgentAPI interface {
    81  	Services() (map[string]*api.AgentService, error)
    82  	Checks() (map[string]*api.AgentCheck, error)
    83  	CheckRegister(check *api.AgentCheckRegistration) error
    84  	CheckDeregister(checkID string) error
    85  	Self() (map[string]map[string]interface{}, error)
    86  	ServiceRegister(service *api.AgentServiceRegistration) error
    87  	ServiceDeregister(serviceID string) error
    88  	UpdateTTL(id, output, status string) error
    89  }
    90  
    91  func agentServiceUpdateRequired(reg *api.AgentServiceRegistration, svc *api.AgentService) bool {
    92  	return !(reg.Kind == svc.Kind &&
    93  		reg.ID == svc.ID &&
    94  		reg.Port == svc.Port &&
    95  		reg.Address == svc.Address &&
    96  		reg.Name == svc.Service &&
    97  		reflect.DeepEqual(reg.Tags, svc.Tags))
    98  }
    99  
   100  // operations are submitted to the main loop via commit() for synchronizing
   101  // with Consul.
   102  type operations struct {
   103  	regServices []*api.AgentServiceRegistration
   104  	regChecks   []*api.AgentCheckRegistration
   105  	scripts     []*scriptCheck
   106  
   107  	deregServices []string
   108  	deregChecks   []string
   109  }
   110  
   111  // AllocRegistration holds the status of services registered for a particular
   112  // allocations by task.
   113  type AllocRegistration struct {
   114  	// Tasks maps the name of a task to its registered services and checks
   115  	Tasks map[string]*TaskRegistration
   116  }
   117  
   118  func (a *AllocRegistration) copy() *AllocRegistration {
   119  	c := &AllocRegistration{
   120  		Tasks: make(map[string]*TaskRegistration, len(a.Tasks)),
   121  	}
   122  
   123  	for k, v := range a.Tasks {
   124  		c.Tasks[k] = v.copy()
   125  	}
   126  
   127  	return c
   128  }
   129  
   130  // NumServices returns the number of registered services
   131  func (a *AllocRegistration) NumServices() int {
   132  	if a == nil {
   133  		return 0
   134  	}
   135  
   136  	total := 0
   137  	for _, treg := range a.Tasks {
   138  		for _, sreg := range treg.Services {
   139  			if sreg.Service != nil {
   140  				total++
   141  			}
   142  		}
   143  	}
   144  
   145  	return total
   146  }
   147  
   148  // NumChecks returns the number of registered checks
   149  func (a *AllocRegistration) NumChecks() int {
   150  	if a == nil {
   151  		return 0
   152  	}
   153  
   154  	total := 0
   155  	for _, treg := range a.Tasks {
   156  		for _, sreg := range treg.Services {
   157  			total += len(sreg.Checks)
   158  		}
   159  	}
   160  
   161  	return total
   162  }
   163  
   164  // TaskRegistration holds the status of services registered for a particular
   165  // task.
   166  type TaskRegistration struct {
   167  	Services map[string]*ServiceRegistration
   168  }
   169  
   170  func (t *TaskRegistration) copy() *TaskRegistration {
   171  	c := &TaskRegistration{
   172  		Services: make(map[string]*ServiceRegistration, len(t.Services)),
   173  	}
   174  
   175  	for k, v := range t.Services {
   176  		c.Services[k] = v.copy()
   177  	}
   178  
   179  	return c
   180  }
   181  
   182  // ServiceRegistration holds the status of a registered Consul Service and its
   183  // Checks.
   184  type ServiceRegistration struct {
   185  	// serviceID and checkIDs are internal fields that track just the IDs of the
   186  	// services/checks registered in Consul. It is used to materialize the other
   187  	// fields when queried.
   188  	serviceID string
   189  	checkIDs  map[string]struct{}
   190  
   191  	// Service is the AgentService registered in Consul.
   192  	Service *api.AgentService
   193  
   194  	// Checks is the status of the registered checks.
   195  	Checks []*api.AgentCheck
   196  }
   197  
   198  func (s *ServiceRegistration) copy() *ServiceRegistration {
   199  	// Copy does not copy the external fields but only the internal fields. This
   200  	// is so that the caller of AllocRegistrations can not access the internal
   201  	// fields and that method uses these fields to populate the external fields.
   202  	return &ServiceRegistration{
   203  		serviceID: s.serviceID,
   204  		checkIDs:  helper.CopyMapStringStruct(s.checkIDs),
   205  	}
   206  }
   207  
   208  // ServiceClient handles task and agent service registration with Consul.
   209  type ServiceClient struct {
   210  	client           AgentAPI
   211  	logger           log.Logger
   212  	retryInterval    time.Duration
   213  	maxRetryInterval time.Duration
   214  	periodicInterval time.Duration
   215  
   216  	// exitCh is closed when the main Run loop exits
   217  	exitCh chan struct{}
   218  
   219  	// shutdownCh is closed when the client should shutdown
   220  	shutdownCh chan struct{}
   221  
   222  	// shutdownWait is how long Shutdown() blocks waiting for the final
   223  	// sync() to finish. Defaults to defaultShutdownWait
   224  	shutdownWait time.Duration
   225  
   226  	opCh chan *operations
   227  
   228  	services       map[string]*api.AgentServiceRegistration
   229  	checks         map[string]*api.AgentCheckRegistration
   230  	scripts        map[string]*scriptCheck
   231  	runningScripts map[string]*scriptHandle
   232  
   233  	// allocRegistrations stores the services and checks that are registered
   234  	// with Consul by allocation ID.
   235  	allocRegistrations     map[string]*AllocRegistration
   236  	allocRegistrationsLock sync.RWMutex
   237  
   238  	// agent services and checks record entries for the agent itself which
   239  	// should be removed on shutdown
   240  	agentServices map[string]struct{}
   241  	agentChecks   map[string]struct{}
   242  	agentLock     sync.Mutex
   243  
   244  	// seen is 1 if Consul has ever been seen; otherwise 0. Accessed with
   245  	// atomics.
   246  	seen int32
   247  
   248  	// checkWatcher restarts checks that are unhealthy.
   249  	checkWatcher *checkWatcher
   250  
   251  	// isClientAgent specifies whether this Consul client is being used
   252  	// by a Nomad client.
   253  	isClientAgent bool
   254  }
   255  
   256  // NewServiceClient creates a new Consul ServiceClient from an existing Consul API
   257  // Client, logger and takes whether the client is being used by a Nomad Client agent.
   258  // When being used by a Nomad client, this Consul client reconciles all services and
   259  // checks created by Nomad on behalf of running tasks.
   260  func NewServiceClient(consulClient AgentAPI, logger log.Logger, isNomadClient bool) *ServiceClient {
   261  	logger = logger.ResetNamed("consul.sync")
   262  	return &ServiceClient{
   263  		client:             consulClient,
   264  		logger:             logger,
   265  		retryInterval:      defaultRetryInterval,
   266  		maxRetryInterval:   defaultMaxRetryInterval,
   267  		periodicInterval:   defaultPeriodicInterval,
   268  		exitCh:             make(chan struct{}),
   269  		shutdownCh:         make(chan struct{}),
   270  		shutdownWait:       defaultShutdownWait,
   271  		opCh:               make(chan *operations, 8),
   272  		services:           make(map[string]*api.AgentServiceRegistration),
   273  		checks:             make(map[string]*api.AgentCheckRegistration),
   274  		scripts:            make(map[string]*scriptCheck),
   275  		runningScripts:     make(map[string]*scriptHandle),
   276  		allocRegistrations: make(map[string]*AllocRegistration),
   277  		agentServices:      make(map[string]struct{}),
   278  		agentChecks:        make(map[string]struct{}),
   279  		checkWatcher:       newCheckWatcher(logger, consulClient),
   280  		isClientAgent:      isNomadClient,
   281  	}
   282  }
   283  
   284  // seen is used by markSeen and hasSeen
   285  const seen = 1
   286  
   287  // markSeen marks Consul as having been seen (meaning at least one operation
   288  // has succeeded).
   289  func (c *ServiceClient) markSeen() {
   290  	atomic.StoreInt32(&c.seen, seen)
   291  }
   292  
   293  // hasSeen returns true if any Consul operation has ever succeeded. Useful to
   294  // squelch errors if Consul isn't running.
   295  func (c *ServiceClient) hasSeen() bool {
   296  	return atomic.LoadInt32(&c.seen) == seen
   297  }
   298  
   299  // Run the Consul main loop which retries operations against Consul. It should
   300  // be called exactly once.
   301  func (c *ServiceClient) Run() {
   302  	defer close(c.exitCh)
   303  
   304  	ctx, cancel := context.WithCancel(context.Background())
   305  	defer cancel()
   306  
   307  	// init will be closed when Consul has been contacted
   308  	init := make(chan struct{})
   309  	go checkConsulTLSSkipVerify(ctx, c.logger, c.client, init)
   310  
   311  	// Process operations while waiting for initial contact with Consul but
   312  	// do not sync until contact has been made.
   313  INIT:
   314  	for {
   315  		select {
   316  		case <-init:
   317  			c.markSeen()
   318  			break INIT
   319  		case <-c.shutdownCh:
   320  			return
   321  		case ops := <-c.opCh:
   322  			c.merge(ops)
   323  		}
   324  	}
   325  	c.logger.Trace("able to contact Consul")
   326  
   327  	// Block until contact with Consul has been established
   328  	// Start checkWatcher
   329  	go c.checkWatcher.Run(ctx)
   330  
   331  	// Always immediately sync to reconcile Nomad and Consul's state
   332  	retryTimer := time.NewTimer(0)
   333  
   334  	failures := 0
   335  	for {
   336  		select {
   337  		case <-retryTimer.C:
   338  		case <-c.shutdownCh:
   339  			// Cancel check watcher but sync one last time
   340  			cancel()
   341  		case ops := <-c.opCh:
   342  			c.merge(ops)
   343  		}
   344  
   345  		if err := c.sync(); err != nil {
   346  			if failures == 0 {
   347  				// Log on the first failure
   348  				c.logger.Warn("failed to update services in Consul", "error", err)
   349  			} else if failures%10 == 0 {
   350  				// Log every 10th consecutive failure
   351  				c.logger.Error("still unable to update services in Consul", "failures", failures, "error", err)
   352  			}
   353  
   354  			failures++
   355  			if !retryTimer.Stop() {
   356  				// Timer already expired, since the timer may
   357  				// or may not have been read in the select{}
   358  				// above, conditionally receive on it
   359  				select {
   360  				case <-retryTimer.C:
   361  				default:
   362  				}
   363  			}
   364  			backoff := c.retryInterval * time.Duration(failures)
   365  			if backoff > c.maxRetryInterval {
   366  				backoff = c.maxRetryInterval
   367  			}
   368  			retryTimer.Reset(backoff)
   369  		} else {
   370  			if failures > 0 {
   371  				c.logger.Info("successfully updated services in Consul")
   372  				failures = 0
   373  			}
   374  
   375  			// Reset timer to periodic interval to periodically
   376  			// reconile with Consul
   377  			if !retryTimer.Stop() {
   378  				select {
   379  				case <-retryTimer.C:
   380  				default:
   381  				}
   382  			}
   383  			retryTimer.Reset(c.periodicInterval)
   384  		}
   385  
   386  		select {
   387  		case <-c.shutdownCh:
   388  			// Exit only after sync'ing all outstanding operations
   389  			if len(c.opCh) > 0 {
   390  				for len(c.opCh) > 0 {
   391  					c.merge(<-c.opCh)
   392  				}
   393  				continue
   394  			}
   395  			return
   396  		default:
   397  		}
   398  
   399  	}
   400  }
   401  
   402  // commit operations unless already shutting down.
   403  func (c *ServiceClient) commit(ops *operations) {
   404  	select {
   405  	case c.opCh <- ops:
   406  	case <-c.shutdownCh:
   407  	}
   408  }
   409  
   410  // merge registrations into state map prior to sync'ing with Consul
   411  func (c *ServiceClient) merge(ops *operations) {
   412  	for _, s := range ops.regServices {
   413  		c.services[s.ID] = s
   414  	}
   415  	for _, check := range ops.regChecks {
   416  		c.checks[check.ID] = check
   417  	}
   418  	for _, s := range ops.scripts {
   419  		c.scripts[s.id] = s
   420  	}
   421  	for _, sid := range ops.deregServices {
   422  		delete(c.services, sid)
   423  	}
   424  	for _, cid := range ops.deregChecks {
   425  		if script, ok := c.runningScripts[cid]; ok {
   426  			script.cancel()
   427  			delete(c.scripts, cid)
   428  			delete(c.runningScripts, cid)
   429  		}
   430  		delete(c.checks, cid)
   431  	}
   432  	metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services)))
   433  	metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks)))
   434  	metrics.SetGauge([]string{"client", "consul", "script_checks"}, float32(len(c.runningScripts)))
   435  }
   436  
   437  // sync enqueued operations.
   438  func (c *ServiceClient) sync() error {
   439  	sreg, creg, sdereg, cdereg := 0, 0, 0, 0
   440  
   441  	consulServices, err := c.client.Services()
   442  	if err != nil {
   443  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   444  		return fmt.Errorf("error querying Consul services: %v", err)
   445  	}
   446  
   447  	consulChecks, err := c.client.Checks()
   448  	if err != nil {
   449  		metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   450  		return fmt.Errorf("error querying Consul checks: %v", err)
   451  	}
   452  
   453  	// Remove Nomad services in Consul but unknown locally
   454  	for id := range consulServices {
   455  		if _, ok := c.services[id]; ok {
   456  			// Known service, skip
   457  			continue
   458  		}
   459  
   460  		// Ignore if this is not a Nomad managed service. Also ignore
   461  		// Nomad managed services if this is not a client agent.
   462  		// This is to prevent server agents from removing services
   463  		// registered by client agents
   464  		if !isNomadService(id) || !c.isClientAgent {
   465  			// Not managed by Nomad, skip
   466  			continue
   467  		}
   468  
   469  		// Unknown Nomad managed service; kill
   470  		if err := c.client.ServiceDeregister(id); err != nil {
   471  			if isOldNomadService(id) {
   472  				// Don't hard-fail on old entries. See #3620
   473  				continue
   474  			}
   475  
   476  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   477  			return err
   478  		}
   479  		sdereg++
   480  		metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1)
   481  	}
   482  
   483  	// Add Nomad services missing from Consul, or where the service has been updated.
   484  	for id, locals := range c.services {
   485  		existingSvc, ok := consulServices[id]
   486  
   487  		if ok {
   488  			// There is an existing registration of this service in Consul, so here
   489  			// we validate to see if the service has been invalidated to see if it
   490  			// should be updated.
   491  			if !agentServiceUpdateRequired(locals, existingSvc) {
   492  				// No Need to update services that have not changed
   493  				continue
   494  			}
   495  		}
   496  
   497  		if err = c.client.ServiceRegister(locals); err != nil {
   498  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   499  			return err
   500  		}
   501  		sreg++
   502  		metrics.IncrCounter([]string{"client", "consul", "service_registrations"}, 1)
   503  	}
   504  
   505  	// Remove Nomad checks in Consul but unknown locally
   506  	for id, check := range consulChecks {
   507  		if _, ok := c.checks[id]; ok {
   508  			// Known check, leave it
   509  			continue
   510  		}
   511  
   512  		// Ignore if this is not a Nomad managed check. Also ignore
   513  		// Nomad managed checks if this is not a client agent.
   514  		// This is to prevent server agents from removing checks
   515  		// registered by client agents
   516  		if !isNomadService(check.ServiceID) || !c.isClientAgent || !isNomadCheck(check.CheckID) {
   517  			// Service not managed by Nomad, skip
   518  			continue
   519  		}
   520  
   521  		// Unknown Nomad managed check; remove
   522  		if err := c.client.CheckDeregister(id); err != nil {
   523  			if isOldNomadService(check.ServiceID) {
   524  				// Don't hard-fail on old entries.
   525  				continue
   526  			}
   527  
   528  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   529  			return err
   530  		}
   531  		cdereg++
   532  		metrics.IncrCounter([]string{"client", "consul", "check_deregistrations"}, 1)
   533  	}
   534  
   535  	// Add Nomad checks missing from Consul
   536  	for id, check := range c.checks {
   537  		if _, ok := consulChecks[id]; ok {
   538  			// Already in Consul; skipping
   539  			continue
   540  		}
   541  
   542  		if err := c.client.CheckRegister(check); err != nil {
   543  			metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
   544  			return err
   545  		}
   546  		creg++
   547  		metrics.IncrCounter([]string{"client", "consul", "check_registrations"}, 1)
   548  
   549  		// Handle starting scripts
   550  		if script, ok := c.scripts[id]; ok {
   551  			// If it's already running, cancel and replace
   552  			if oldScript, running := c.runningScripts[id]; running {
   553  				oldScript.cancel()
   554  			}
   555  			// Start and store the handle
   556  			c.runningScripts[id] = script.run()
   557  		}
   558  	}
   559  
   560  	// Only log if something was actually synced
   561  	if sreg > 0 || sdereg > 0 || creg > 0 || cdereg > 0 {
   562  		c.logger.Debug("sync complete", "registered_services", sreg, "deregistered_services", sdereg,
   563  			"registered_checks", creg, "deregistered_checks", cdereg)
   564  	}
   565  	return nil
   566  }
   567  
   568  // RegisterAgent registers Nomad agents (client or server). The
   569  // Service.PortLabel should be a literal port to be parsed with SplitHostPort.
   570  // Script checks are not supported and will return an error. Registration is
   571  // asynchronous.
   572  //
   573  // Agents will be deregistered when Shutdown is called.
   574  func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error {
   575  	ops := operations{}
   576  
   577  	for _, service := range services {
   578  		id := makeAgentServiceID(role, service)
   579  
   580  		// Unlike tasks, agents don't use port labels. Agent ports are
   581  		// stored directly in the PortLabel.
   582  		host, rawport, err := net.SplitHostPort(service.PortLabel)
   583  		if err != nil {
   584  			return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err)
   585  		}
   586  		port, err := strconv.Atoi(rawport)
   587  		if err != nil {
   588  			return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err)
   589  		}
   590  		serviceReg := &api.AgentServiceRegistration{
   591  			ID:      id,
   592  			Name:    service.Name,
   593  			Tags:    service.Tags,
   594  			Address: host,
   595  			Port:    port,
   596  			// This enables the consul UI to show that Nomad registered this service
   597  			Meta: map[string]string{
   598  				"external-source": "nomad",
   599  			},
   600  		}
   601  		ops.regServices = append(ops.regServices, serviceReg)
   602  
   603  		for _, check := range service.Checks {
   604  			checkID := makeCheckID(id, check)
   605  			if check.Type == structs.ServiceCheckScript {
   606  				return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name)
   607  			}
   608  			checkHost, checkPort := serviceReg.Address, serviceReg.Port
   609  			if check.PortLabel != "" {
   610  				// Unlike tasks, agents don't use port labels. Agent ports are
   611  				// stored directly in the PortLabel.
   612  				host, rawport, err := net.SplitHostPort(check.PortLabel)
   613  				if err != nil {
   614  					return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err)
   615  				}
   616  				port, err := strconv.Atoi(rawport)
   617  				if err != nil {
   618  					return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err)
   619  				}
   620  				checkHost, checkPort = host, port
   621  			}
   622  			checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort)
   623  			if err != nil {
   624  				return fmt.Errorf("failed to add check %q: %v", check.Name, err)
   625  			}
   626  			ops.regChecks = append(ops.regChecks, checkReg)
   627  		}
   628  	}
   629  
   630  	// Don't bother committing agent checks if we're already shutting down
   631  	c.agentLock.Lock()
   632  	defer c.agentLock.Unlock()
   633  	select {
   634  	case <-c.shutdownCh:
   635  		return nil
   636  	default:
   637  	}
   638  
   639  	// Now add them to the registration queue
   640  	c.commit(&ops)
   641  
   642  	// Record IDs for deregistering on shutdown
   643  	for _, id := range ops.regServices {
   644  		c.agentServices[id.ID] = struct{}{}
   645  	}
   646  	for _, id := range ops.regChecks {
   647  		c.agentChecks[id.ID] = struct{}{}
   648  	}
   649  	return nil
   650  }
   651  
   652  // serviceRegs creates service registrations, check registrations, and script
   653  // checks from a service. It returns a service registration object with the
   654  // service and check IDs populated.
   655  func (c *ServiceClient) serviceRegs(ops *operations, service *structs.Service, task *TaskServices) (
   656  	*ServiceRegistration, error) {
   657  
   658  	// Get the services ID
   659  	id := makeTaskServiceID(task.AllocID, task.Name, service, task.Canary)
   660  	sreg := &ServiceRegistration{
   661  		serviceID: id,
   662  		checkIDs:  make(map[string]struct{}, len(service.Checks)),
   663  	}
   664  
   665  	// Service address modes default to auto
   666  	addrMode := service.AddressMode
   667  	if addrMode == "" {
   668  		addrMode = structs.AddressModeAuto
   669  	}
   670  
   671  	// Determine the address to advertise based on the mode
   672  	ip, port, err := getAddress(addrMode, service.PortLabel, task.Networks, task.DriverNetwork)
   673  	if err != nil {
   674  		return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err)
   675  	}
   676  
   677  	// Determine whether to use tags or canary_tags
   678  	var tags []string
   679  	if task.Canary && len(service.CanaryTags) > 0 {
   680  		tags = make([]string, len(service.CanaryTags))
   681  		copy(tags, service.CanaryTags)
   682  	} else {
   683  		tags = make([]string, len(service.Tags))
   684  		copy(tags, service.Tags)
   685  	}
   686  
   687  	// Build the Consul Service registration request
   688  	serviceReg := &api.AgentServiceRegistration{
   689  		ID:      id,
   690  		Name:    service.Name,
   691  		Tags:    tags,
   692  		Address: ip,
   693  		Port:    port,
   694  		// This enables the consul UI to show that Nomad registered this service
   695  		Meta: map[string]string{
   696  			"external-source": "nomad",
   697  		},
   698  	}
   699  	ops.regServices = append(ops.regServices, serviceReg)
   700  
   701  	// Build the check registrations
   702  	checkIDs, err := c.checkRegs(ops, id, service, task)
   703  	if err != nil {
   704  		return nil, err
   705  	}
   706  	for _, cid := range checkIDs {
   707  		sreg.checkIDs[cid] = struct{}{}
   708  	}
   709  	return sreg, nil
   710  }
   711  
   712  // checkRegs registers the checks for the given service and returns the
   713  // registered check ids.
   714  func (c *ServiceClient) checkRegs(ops *operations, serviceID string, service *structs.Service,
   715  	task *TaskServices) ([]string, error) {
   716  
   717  	// Fast path
   718  	numChecks := len(service.Checks)
   719  	if numChecks == 0 {
   720  		return nil, nil
   721  	}
   722  
   723  	checkIDs := make([]string, 0, numChecks)
   724  	for _, check := range service.Checks {
   725  		checkID := makeCheckID(serviceID, check)
   726  		checkIDs = append(checkIDs, checkID)
   727  		if check.Type == structs.ServiceCheckScript {
   728  			if task.DriverExec == nil {
   729  				return nil, fmt.Errorf("driver doesn't support script checks")
   730  			}
   731  
   732  			sc := newScriptCheck(task.AllocID, task.Name, checkID, check, task.DriverExec,
   733  				c.client, c.logger, c.shutdownCh)
   734  			ops.scripts = append(ops.scripts, sc)
   735  
   736  			// Skip getAddress for script checks
   737  			checkReg, err := createCheckReg(serviceID, checkID, check, "", 0)
   738  			if err != nil {
   739  				return nil, fmt.Errorf("failed to add script check %q: %v", check.Name, err)
   740  			}
   741  			ops.regChecks = append(ops.regChecks, checkReg)
   742  			continue
   743  		}
   744  
   745  		// Default to the service's port but allow check to override
   746  		portLabel := check.PortLabel
   747  		if portLabel == "" {
   748  			// Default to the service's port label
   749  			portLabel = service.PortLabel
   750  		}
   751  
   752  		// Checks address mode defaults to host for pre-#3380 backward compat
   753  		addrMode := check.AddressMode
   754  		if addrMode == "" {
   755  			addrMode = structs.AddressModeHost
   756  		}
   757  
   758  		ip, port, err := getAddress(addrMode, portLabel, task.Networks, task.DriverNetwork)
   759  		if err != nil {
   760  			return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err)
   761  		}
   762  
   763  		checkReg, err := createCheckReg(serviceID, checkID, check, ip, port)
   764  		if err != nil {
   765  			return nil, fmt.Errorf("failed to add check %q: %v", check.Name, err)
   766  		}
   767  		ops.regChecks = append(ops.regChecks, checkReg)
   768  	}
   769  	return checkIDs, nil
   770  }
   771  
   772  // RegisterTask with Consul. Adds all service entries and checks to Consul. If
   773  // exec is nil and a script check exists an error is returned.
   774  //
   775  // If the service IP is set it used as the address in the service registration.
   776  // Checks will always use the IP from the Task struct (host's IP).
   777  //
   778  // Actual communication with Consul is done asynchronously (see Run).
   779  func (c *ServiceClient) RegisterTask(task *TaskServices) error {
   780  	// Fast path
   781  	numServices := len(task.Services)
   782  	if numServices == 0 {
   783  		return nil
   784  	}
   785  
   786  	t := new(TaskRegistration)
   787  	t.Services = make(map[string]*ServiceRegistration, numServices)
   788  
   789  	ops := &operations{}
   790  	for _, service := range task.Services {
   791  		sreg, err := c.serviceRegs(ops, service, task)
   792  		if err != nil {
   793  			return err
   794  		}
   795  		t.Services[sreg.serviceID] = sreg
   796  	}
   797  
   798  	// Add the task to the allocation's registration
   799  	c.addTaskRegistration(task.AllocID, task.Name, t)
   800  
   801  	c.commit(ops)
   802  
   803  	// Start watching checks. Done after service registrations are built
   804  	// since an error building them could leak watches.
   805  	for _, service := range task.Services {
   806  		serviceID := makeTaskServiceID(task.AllocID, task.Name, service, task.Canary)
   807  		for _, check := range service.Checks {
   808  			if check.TriggersRestarts() {
   809  				checkID := makeCheckID(serviceID, check)
   810  				c.checkWatcher.Watch(task.AllocID, task.Name, checkID, check, task.Restarter)
   811  			}
   812  		}
   813  	}
   814  	return nil
   815  }
   816  
   817  // UpdateTask in Consul. Does not alter the service if only checks have
   818  // changed.
   819  //
   820  // DriverNetwork must not change between invocations for the same allocation.
   821  func (c *ServiceClient) UpdateTask(old, newTask *TaskServices) error {
   822  	ops := &operations{}
   823  
   824  	taskReg := new(TaskRegistration)
   825  	taskReg.Services = make(map[string]*ServiceRegistration, len(newTask.Services))
   826  
   827  	existingIDs := make(map[string]*structs.Service, len(old.Services))
   828  	for _, s := range old.Services {
   829  		existingIDs[makeTaskServiceID(old.AllocID, old.Name, s, old.Canary)] = s
   830  	}
   831  	newIDs := make(map[string]*structs.Service, len(newTask.Services))
   832  	for _, s := range newTask.Services {
   833  		newIDs[makeTaskServiceID(newTask.AllocID, newTask.Name, s, newTask.Canary)] = s
   834  	}
   835  
   836  	// Loop over existing Service IDs to see if they have been removed
   837  	for existingID, existingSvc := range existingIDs {
   838  		newSvc, ok := newIDs[existingID]
   839  
   840  		if !ok {
   841  			// Existing service entry removed
   842  			ops.deregServices = append(ops.deregServices, existingID)
   843  			for _, check := range existingSvc.Checks {
   844  				cid := makeCheckID(existingID, check)
   845  				ops.deregChecks = append(ops.deregChecks, cid)
   846  
   847  				// Unwatch watched checks
   848  				if check.TriggersRestarts() {
   849  					c.checkWatcher.Unwatch(cid)
   850  				}
   851  			}
   852  			continue
   853  		}
   854  
   855  		oldHash := existingSvc.Hash(old.AllocID, old.Name, old.Canary)
   856  		newHash := newSvc.Hash(newTask.AllocID, newTask.Name, newTask.Canary)
   857  		if oldHash == newHash {
   858  			// Service exists and hasn't changed, don't re-add it later
   859  			delete(newIDs, existingID)
   860  		}
   861  
   862  		// Service still exists so add it to the task's registration
   863  		sreg := &ServiceRegistration{
   864  			serviceID: existingID,
   865  			checkIDs:  make(map[string]struct{}, len(newSvc.Checks)),
   866  		}
   867  		taskReg.Services[existingID] = sreg
   868  
   869  		// See if any checks were updated
   870  		existingChecks := make(map[string]*structs.ServiceCheck, len(existingSvc.Checks))
   871  		for _, check := range existingSvc.Checks {
   872  			existingChecks[makeCheckID(existingID, check)] = check
   873  		}
   874  
   875  		// Register new checks
   876  		for _, check := range newSvc.Checks {
   877  			checkID := makeCheckID(existingID, check)
   878  			if _, exists := existingChecks[checkID]; exists {
   879  				// Check is still required. Remove it from the map so it doesn't get
   880  				// deleted later.
   881  				delete(existingChecks, checkID)
   882  				sreg.checkIDs[checkID] = struct{}{}
   883  			}
   884  
   885  			// New check on an unchanged service; add them now
   886  			newCheckIDs, err := c.checkRegs(ops, existingID, newSvc, newTask)
   887  			if err != nil {
   888  				return err
   889  			}
   890  
   891  			for _, checkID := range newCheckIDs {
   892  				sreg.checkIDs[checkID] = struct{}{}
   893  			}
   894  
   895  			// Update all watched checks as CheckRestart fields aren't part of ID
   896  			if check.TriggersRestarts() {
   897  				c.checkWatcher.Watch(newTask.AllocID, newTask.Name, checkID, check, newTask.Restarter)
   898  			}
   899  		}
   900  
   901  		// Remove existing checks not in updated service
   902  		for cid, check := range existingChecks {
   903  			ops.deregChecks = append(ops.deregChecks, cid)
   904  
   905  			// Unwatch checks
   906  			if check.TriggersRestarts() {
   907  				c.checkWatcher.Unwatch(cid)
   908  			}
   909  		}
   910  	}
   911  
   912  	// Any remaining services should just be enqueued directly
   913  	for _, newSvc := range newIDs {
   914  		sreg, err := c.serviceRegs(ops, newSvc, newTask)
   915  		if err != nil {
   916  			return err
   917  		}
   918  
   919  		taskReg.Services[sreg.serviceID] = sreg
   920  	}
   921  
   922  	// Add the task to the allocation's registration
   923  	c.addTaskRegistration(newTask.AllocID, newTask.Name, taskReg)
   924  
   925  	c.commit(ops)
   926  
   927  	// Start watching checks. Done after service registrations are built
   928  	// since an error building them could leak watches.
   929  	for _, service := range newIDs {
   930  		serviceID := makeTaskServiceID(newTask.AllocID, newTask.Name, service, newTask.Canary)
   931  		for _, check := range service.Checks {
   932  			if check.TriggersRestarts() {
   933  				checkID := makeCheckID(serviceID, check)
   934  				c.checkWatcher.Watch(newTask.AllocID, newTask.Name, checkID, check, newTask.Restarter)
   935  			}
   936  		}
   937  	}
   938  	return nil
   939  }
   940  
   941  // RemoveTask from Consul. Removes all service entries and checks.
   942  //
   943  // Actual communication with Consul is done asynchronously (see Run).
   944  func (c *ServiceClient) RemoveTask(task *TaskServices) {
   945  	ops := operations{}
   946  
   947  	for _, service := range task.Services {
   948  		id := makeTaskServiceID(task.AllocID, task.Name, service, task.Canary)
   949  		ops.deregServices = append(ops.deregServices, id)
   950  
   951  		for _, check := range service.Checks {
   952  			cid := makeCheckID(id, check)
   953  			ops.deregChecks = append(ops.deregChecks, cid)
   954  
   955  			if check.TriggersRestarts() {
   956  				c.checkWatcher.Unwatch(cid)
   957  			}
   958  		}
   959  	}
   960  
   961  	// Remove the task from the alloc's registrations
   962  	c.removeTaskRegistration(task.AllocID, task.Name)
   963  
   964  	// Now add them to the deregistration fields; main Run loop will update
   965  	c.commit(&ops)
   966  }
   967  
   968  // AllocRegistrations returns the registrations for the given allocation. If the
   969  // allocation has no reservations, the response is a nil object.
   970  func (c *ServiceClient) AllocRegistrations(allocID string) (*AllocRegistration, error) {
   971  	// Get the internal struct using the lock
   972  	c.allocRegistrationsLock.RLock()
   973  	regInternal, ok := c.allocRegistrations[allocID]
   974  	if !ok {
   975  		c.allocRegistrationsLock.RUnlock()
   976  		return nil, nil
   977  	}
   978  
   979  	// Copy so we don't expose internal structs
   980  	reg := regInternal.copy()
   981  	c.allocRegistrationsLock.RUnlock()
   982  
   983  	// Query the services and checks to populate the allocation registrations.
   984  	services, err := c.client.Services()
   985  	if err != nil {
   986  		return nil, err
   987  	}
   988  
   989  	checks, err := c.client.Checks()
   990  	if err != nil {
   991  		return nil, err
   992  	}
   993  
   994  	// Populate the object
   995  	for _, treg := range reg.Tasks {
   996  		for serviceID, sreg := range treg.Services {
   997  			sreg.Service = services[serviceID]
   998  			for checkID := range sreg.checkIDs {
   999  				if check, ok := checks[checkID]; ok {
  1000  					sreg.Checks = append(sreg.Checks, check)
  1001  				}
  1002  			}
  1003  		}
  1004  	}
  1005  
  1006  	return reg, nil
  1007  }
  1008  
  1009  // Shutdown the Consul client. Update running task registrations and deregister
  1010  // agent from Consul. On first call blocks up to shutdownWait before giving up
  1011  // on syncing operations.
  1012  func (c *ServiceClient) Shutdown() error {
  1013  	// Serialize Shutdown calls with RegisterAgent to prevent leaking agent
  1014  	// entries.
  1015  	c.agentLock.Lock()
  1016  	defer c.agentLock.Unlock()
  1017  	select {
  1018  	case <-c.shutdownCh:
  1019  		return nil
  1020  	default:
  1021  		close(c.shutdownCh)
  1022  	}
  1023  
  1024  	// Give run loop time to sync, but don't block indefinitely
  1025  	deadline := time.After(c.shutdownWait)
  1026  
  1027  	// Wait for Run to finish any outstanding operations and exit
  1028  	select {
  1029  	case <-c.exitCh:
  1030  	case <-deadline:
  1031  		// Don't wait forever though
  1032  	}
  1033  
  1034  	// If Consul was never seen nothing could be written so exit early
  1035  	if !c.hasSeen() {
  1036  		return nil
  1037  	}
  1038  
  1039  	// Always attempt to deregister Nomad agent Consul entries, even if
  1040  	// deadline was reached
  1041  	for id := range c.agentServices {
  1042  		if err := c.client.ServiceDeregister(id); err != nil {
  1043  			c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
  1044  		}
  1045  	}
  1046  	for id := range c.agentChecks {
  1047  		if err := c.client.CheckDeregister(id); err != nil {
  1048  			c.logger.Error("failed deregistering agent check", "check_id", id, "error", err)
  1049  		}
  1050  	}
  1051  
  1052  	// Give script checks time to exit (no need to lock as Run() has exited)
  1053  	for _, h := range c.runningScripts {
  1054  		select {
  1055  		case <-h.wait():
  1056  		case <-deadline:
  1057  			return fmt.Errorf("timed out waiting for script checks to run")
  1058  		}
  1059  	}
  1060  	return nil
  1061  }
  1062  
  1063  // addTaskRegistration adds the task registration for the given allocation.
  1064  func (c *ServiceClient) addTaskRegistration(allocID, taskName string, reg *TaskRegistration) {
  1065  	c.allocRegistrationsLock.Lock()
  1066  	defer c.allocRegistrationsLock.Unlock()
  1067  
  1068  	alloc, ok := c.allocRegistrations[allocID]
  1069  	if !ok {
  1070  		alloc = &AllocRegistration{
  1071  			Tasks: make(map[string]*TaskRegistration),
  1072  		}
  1073  		c.allocRegistrations[allocID] = alloc
  1074  	}
  1075  	alloc.Tasks[taskName] = reg
  1076  }
  1077  
  1078  // removeTaskRegistration removes the task registration for the given allocation.
  1079  func (c *ServiceClient) removeTaskRegistration(allocID, taskName string) {
  1080  	c.allocRegistrationsLock.Lock()
  1081  	defer c.allocRegistrationsLock.Unlock()
  1082  
  1083  	alloc, ok := c.allocRegistrations[allocID]
  1084  	if !ok {
  1085  		return
  1086  	}
  1087  
  1088  	// Delete the task and if it is the last one also delete the alloc's
  1089  	// registration
  1090  	delete(alloc.Tasks, taskName)
  1091  	if len(alloc.Tasks) == 0 {
  1092  		delete(c.allocRegistrations, allocID)
  1093  	}
  1094  }
  1095  
  1096  // makeAgentServiceID creates a unique ID for identifying an agent service in
  1097  // Consul.
  1098  //
  1099  // Agent service IDs are of the form:
  1100  //
  1101  //	{nomadServicePrefix}-{ROLE}-b32(sha1({Service.Name}-{Service.Tags...})
  1102  //	Example Server ID: _nomad-server-fbbk265qn4tmt25nd4ep42tjvmyj3hr4
  1103  //	Example Client ID: _nomad-client-ggnjpgl7yn7rgmvxzilmpvrzzvrszc7l
  1104  //
  1105  func makeAgentServiceID(role string, service *structs.Service) string {
  1106  	return fmt.Sprintf("%s-%s-%s", nomadServicePrefix, role, service.Hash(role, "", false))
  1107  }
  1108  
  1109  // makeTaskServiceID creates a unique ID for identifying a task service in
  1110  // Consul. All structs.Service fields are included in the ID's hash except
  1111  // Checks. This allows updates to merely compare IDs.
  1112  //
  1113  //	Example Service ID: _nomad-task-b4e61df9-b095-d64e-f241-23860da1375f-redis-http
  1114  func makeTaskServiceID(allocID, taskName string, service *structs.Service, canary bool) string {
  1115  	return fmt.Sprintf("%s%s-%s-%s", nomadTaskPrefix, allocID, taskName, service.Name)
  1116  }
  1117  
  1118  // makeCheckID creates a unique ID for a check.
  1119  //
  1120  //  Example Check ID: _nomad-check-434ae42f9a57c5705344974ac38de2aee0ee089d
  1121  func makeCheckID(serviceID string, check *structs.ServiceCheck) string {
  1122  	return fmt.Sprintf("%s%s", nomadCheckPrefix, check.Hash(serviceID))
  1123  }
  1124  
  1125  // createCheckReg creates a Check that can be registered with Consul.
  1126  //
  1127  // Script checks simply have a TTL set and the caller is responsible for
  1128  // running the script and heartbeating.
  1129  func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) {
  1130  	chkReg := api.AgentCheckRegistration{
  1131  		ID:        checkID,
  1132  		Name:      check.Name,
  1133  		ServiceID: serviceID,
  1134  	}
  1135  	chkReg.Status = check.InitialStatus
  1136  	chkReg.Timeout = check.Timeout.String()
  1137  	chkReg.Interval = check.Interval.String()
  1138  
  1139  	// Require an address for http or tcp checks
  1140  	if port == 0 && check.RequiresPort() {
  1141  		return nil, fmt.Errorf("%s checks require an address", check.Type)
  1142  	}
  1143  
  1144  	switch check.Type {
  1145  	case structs.ServiceCheckHTTP:
  1146  		proto := check.Protocol
  1147  		if proto == "" {
  1148  			proto = "http"
  1149  		}
  1150  		if check.TLSSkipVerify {
  1151  			chkReg.TLSSkipVerify = true
  1152  		}
  1153  		base := url.URL{
  1154  			Scheme: proto,
  1155  			Host:   net.JoinHostPort(host, strconv.Itoa(port)),
  1156  		}
  1157  		relative, err := url.Parse(check.Path)
  1158  		if err != nil {
  1159  			return nil, err
  1160  		}
  1161  		url := base.ResolveReference(relative)
  1162  		chkReg.HTTP = url.String()
  1163  		chkReg.Method = check.Method
  1164  		chkReg.Header = check.Header
  1165  
  1166  	case structs.ServiceCheckTCP:
  1167  		chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port))
  1168  
  1169  	case structs.ServiceCheckScript:
  1170  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
  1171  		// As of Consul 1.0.0 setting TTL and Interval is a 400
  1172  		chkReg.Interval = ""
  1173  
  1174  	case structs.ServiceCheckGRPC:
  1175  		chkReg.GRPC = fmt.Sprintf("%s/%s", net.JoinHostPort(host, strconv.Itoa(port)), check.GRPCService)
  1176  		chkReg.GRPCUseTLS = check.GRPCUseTLS
  1177  		if check.TLSSkipVerify {
  1178  			chkReg.TLSSkipVerify = true
  1179  		}
  1180  
  1181  	default:
  1182  		return nil, fmt.Errorf("check type %+q not valid", check.Type)
  1183  	}
  1184  	return &chkReg, nil
  1185  }
  1186  
  1187  // isNomadCheck returns true if the ID matches the pattern of a Nomad managed
  1188  // check.
  1189  func isNomadCheck(id string) bool {
  1190  	return strings.HasPrefix(id, nomadCheckPrefix)
  1191  }
  1192  
  1193  // isNomadService returns true if the ID matches the pattern of a Nomad managed
  1194  // service (new or old formats). Agent services return false as independent
  1195  // client and server agents may be running on the same machine. #2827
  1196  func isNomadService(id string) bool {
  1197  	return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id)
  1198  }
  1199  
  1200  // isOldNomadService returns true if the ID matches an old pattern managed by
  1201  // Nomad.
  1202  //
  1203  // Pre-0.7.1 task service IDs are of the form:
  1204  //
  1205  //	{nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
  1206  //	Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
  1207  //
  1208  func isOldNomadService(id string) bool {
  1209  	const prefix = nomadServicePrefix + "-executor"
  1210  	return strings.HasPrefix(id, prefix)
  1211  }
  1212  
  1213  // getAddress returns the IP and port to use for a service or check. If no port
  1214  // label is specified (an empty value), zero values are returned because no
  1215  // address could be resolved.
  1216  func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet *drivers.DriverNetwork) (string, int, error) {
  1217  	switch addrMode {
  1218  	case structs.AddressModeAuto:
  1219  		if driverNet.Advertise() {
  1220  			addrMode = structs.AddressModeDriver
  1221  		} else {
  1222  			addrMode = structs.AddressModeHost
  1223  		}
  1224  		return getAddress(addrMode, portLabel, networks, driverNet)
  1225  	case structs.AddressModeHost:
  1226  		if portLabel == "" {
  1227  			if len(networks) != 1 {
  1228  				// If no networks are specified return zero
  1229  				// values. Consul will advertise the host IP
  1230  				// with no port. This is the pre-0.7.1 behavior
  1231  				// some people rely on.
  1232  				return "", 0, nil
  1233  			}
  1234  
  1235  			return networks[0].IP, 0, nil
  1236  		}
  1237  
  1238  		// Default path: use host ip:port
  1239  		ip, port := networks.Port(portLabel)
  1240  		if ip == "" && port <= 0 {
  1241  			return "", 0, fmt.Errorf("invalid port %q: port label not found", portLabel)
  1242  		}
  1243  		return ip, port, nil
  1244  
  1245  	case structs.AddressModeDriver:
  1246  		// Require a driver network if driver address mode is used
  1247  		if driverNet == nil {
  1248  			return "", 0, fmt.Errorf(`cannot use address_mode="driver": no driver network exists`)
  1249  		}
  1250  
  1251  		// If no port label is specified just return the IP
  1252  		if portLabel == "" {
  1253  			return driverNet.IP, 0, nil
  1254  		}
  1255  
  1256  		// If the port is a label, use the driver's port (not the host's)
  1257  		if port, ok := driverNet.PortMap[portLabel]; ok {
  1258  			return driverNet.IP, port, nil
  1259  		}
  1260  
  1261  		// If port isn't a label, try to parse it as a literal port number
  1262  		port, err := strconv.Atoi(portLabel)
  1263  		if err != nil {
  1264  			// Don't include Atoi error message as user likely
  1265  			// never intended it to be a numeric and it creates a
  1266  			// confusing error message
  1267  			return "", 0, fmt.Errorf("invalid port label %q: port labels in driver address_mode must be numeric or in the driver's port map", portLabel)
  1268  		}
  1269  		if port <= 0 {
  1270  			return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel)
  1271  		}
  1272  
  1273  		return driverNet.IP, port, nil
  1274  
  1275  	default:
  1276  		// Shouldn't happen due to validation, but enforce invariants
  1277  		return "", 0, fmt.Errorf("invalid address mode %q", addrMode)
  1278  	}
  1279  }