github.com/adityamillind98/nomad@v0.11.8/command/agent/consul/client.go (about) 1 package consul 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "net/url" 8 "reflect" 9 "strconv" 10 "strings" 11 "sync" 12 "sync/atomic" 13 "time" 14 15 metrics "github.com/armon/go-metrics" 16 log "github.com/hashicorp/go-hclog" 17 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/nomad/helper" 20 "github.com/hashicorp/nomad/nomad/structs" 21 "github.com/hashicorp/nomad/plugins/drivers" 22 ) 23 24 const ( 25 // nomadServicePrefix is the prefix that scopes all Nomad registered 26 // services (both agent and task entries). 27 nomadServicePrefix = "_nomad" 28 29 // nomadTaskPrefix is the prefix that scopes Nomad registered services 30 // for tasks. 31 nomadTaskPrefix = nomadServicePrefix + "-task-" 32 33 // nomadCheckPrefix is the prefix that scopes Nomad registered checks for 34 // services. 35 nomadCheckPrefix = nomadServicePrefix + "-check-" 36 37 // defaultRetryInterval is how quickly to retry syncing services and 38 // checks to Consul when an error occurs. Will backoff up to a max. 39 defaultRetryInterval = time.Second 40 41 // defaultMaxRetryInterval is the default max retry interval. 42 defaultMaxRetryInterval = 30 * time.Second 43 44 // defaultPeriodicalInterval is the interval at which the service 45 // client reconciles state between the desired services and checks and 46 // what's actually registered in Consul. This is done at an interval, 47 // rather than being purely edge triggered, to handle the case that the 48 // Consul agent's state may change underneath us 49 defaultPeriodicInterval = 30 * time.Second 50 51 // ttlCheckBuffer is the time interval that Nomad can take to report Consul 52 // the check result 53 ttlCheckBuffer = 31 * time.Second 54 55 // defaultShutdownWait is how long Shutdown() should block waiting for 56 // enqueued operations to sync to Consul by default. 57 defaultShutdownWait = time.Minute 58 59 // DefaultQueryWaitDuration is the max duration the Consul Agent will 60 // spend waiting for a response from a Consul Query. 61 DefaultQueryWaitDuration = 2 * time.Second 62 63 // ServiceTagHTTP is the tag assigned to HTTP services 64 ServiceTagHTTP = "http" 65 66 // ServiceTagRPC is the tag assigned to RPC services 67 ServiceTagRPC = "rpc" 68 69 // ServiceTagSerf is the tag assigned to Serf services 70 ServiceTagSerf = "serf" 71 72 // deregisterProbationPeriod is the initialization period where 73 // services registered in Consul but not in Nomad don't get deregistered, 74 // to allow for nomad restoring tasks 75 deregisterProbationPeriod = time.Minute 76 ) 77 78 // Additional Consul ACLs required 79 // - Consul Template: key:read 80 // Used in tasks with template stanza that use Consul keys. 81 82 // CatalogAPI is the consul/api.Catalog API used by Nomad. 83 // 84 // ACL requirements 85 // - node:read (listing datacenters) 86 // - service:read 87 type CatalogAPI interface { 88 Datacenters() ([]string, error) 89 Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error) 90 } 91 92 // AgentAPI is the consul/api.Agent API used by Nomad. 93 // 94 // ACL requirements 95 // - agent:read 96 // - service:write 97 type AgentAPI interface { 98 Services() (map[string]*api.AgentService, error) 99 Checks() (map[string]*api.AgentCheck, error) 100 CheckRegister(check *api.AgentCheckRegistration) error 101 CheckDeregister(checkID string) error 102 Self() (map[string]map[string]interface{}, error) 103 ServiceRegister(service *api.AgentServiceRegistration) error 104 ServiceDeregister(serviceID string) error 105 UpdateTTL(id, output, status string) error 106 } 107 108 // ACLsAPI is the consul/api.ACL API subset used by Nomad Server. 109 // 110 // ACL requirements 111 // - acl:write (server only) 112 type ACLsAPI interface { 113 // We are looking up by [operator token] SecretID, which implies we need 114 // to use this method instead of the normal TokenRead, which can only be 115 // used to lookup tokens by their AccessorID. 116 TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error) 117 PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error) 118 RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error) 119 TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error) 120 TokenDelete(accessorID string, q *api.WriteOptions) (*api.WriteMeta, error) 121 TokenList(q *api.QueryOptions) ([]*api.ACLTokenListEntry, *api.QueryMeta, error) 122 } 123 124 // agentServiceUpdateRequired checks if any critical fields in Nomad's version 125 // of a service definition are different from the existing service definition as 126 // known by Consul. 127 // 128 // reason - The syncReason that triggered this synchronization with the consul 129 // agent API. 130 // wanted - Nomad's view of what the service definition is intended to be. 131 // Not nil. 132 // existing - Consul's view (agent, not catalog) of the actual service definition. 133 // Not nil. 134 // sidecar - Consul's view (agent, not catalog) of the service definition of the sidecar 135 // associated with existing that may or may not exist. 136 // May be nil. 137 func agentServiceUpdateRequired(reason syncReason, wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool { 138 switch reason { 139 case syncPeriodic: 140 // In a periodic sync with Consul, we need to respect the value of 141 // the enable_tag_override field so that we maintain the illusion that the 142 // user is in control of the Consul tags, as they may be externally edited 143 // via the Consul catalog API (e.g. a user manually sets them). 144 // 145 // As Consul does by disabling anti-entropy for the tags field, Nomad will 146 // ignore differences in the tags field during the periodic syncs with 147 // the Consul agent API. 148 // 149 // We do so by over-writing the nomad service registration by the value 150 // of the tags that Consul contains, if enable_tag_override = true. 151 maybeTweakTags(wanted, existing, sidecar) 152 return different(wanted, existing, sidecar) 153 154 default: 155 // A non-periodic sync with Consul indicates an operation has been set 156 // on the queue. This happens when service has been added / removed / modified 157 // and implies the Consul agent should be sync'd with nomad, because 158 // nomad is the ultimate source of truth for the service definition. 159 return different(wanted, existing, sidecar) 160 } 161 } 162 163 // maybeTweakTags will override wanted.Tags with a copy of existing.Tags only if 164 // EnableTagOverride is true. Otherwise the wanted service registration is left 165 // unchanged. 166 func maybeTweakTags(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) { 167 if wanted.EnableTagOverride { 168 wanted.Tags = helper.CopySliceString(existing.Tags) 169 // If the service registration also defines a sidecar service, use the ETO 170 // setting for the parent service to also apply to the sidecar. 171 if wanted.Connect != nil && wanted.Connect.SidecarService != nil { 172 if sidecar != nil { 173 wanted.Connect.SidecarService.Tags = helper.CopySliceString(sidecar.Tags) 174 } 175 } 176 } 177 } 178 179 // different compares the wanted state of the service registration with the actual 180 // (cached) state of the service registration reported by Consul. If any of the 181 // critical fields are not deeply equal, they considered different. 182 func different(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool { 183 184 return !(wanted.Kind == existing.Kind && 185 wanted.ID == existing.ID && 186 wanted.Port == existing.Port && 187 wanted.Address == existing.Address && 188 wanted.Name == existing.Service && 189 wanted.EnableTagOverride == existing.EnableTagOverride && 190 reflect.DeepEqual(wanted.Meta, existing.Meta) && 191 reflect.DeepEqual(wanted.Tags, existing.Tags) && 192 !connectSidecarDifferent(wanted, sidecar)) 193 } 194 195 func connectSidecarDifferent(wanted *api.AgentServiceRegistration, sidecar *api.AgentService) bool { 196 if wanted.Connect != nil && wanted.Connect.SidecarService != nil { 197 if sidecar == nil { 198 // consul lost our sidecar (?) 199 return true 200 } 201 if !reflect.DeepEqual(wanted.Connect.SidecarService.Tags, sidecar.Tags) { 202 // tags on the nomad definition have been modified 203 return true 204 } 205 } 206 207 // There is no connect sidecar the nomad side; let consul anti-entropy worry 208 // about any registration on the consul side. 209 return false 210 } 211 212 // operations are submitted to the main loop via commit() for synchronizing 213 // with Consul. 214 type operations struct { 215 regServices []*api.AgentServiceRegistration 216 regChecks []*api.AgentCheckRegistration 217 deregServices []string 218 deregChecks []string 219 } 220 221 // AllocRegistration holds the status of services registered for a particular 222 // allocations by task. 223 type AllocRegistration struct { 224 // Tasks maps the name of a task to its registered services and checks 225 Tasks map[string]*ServiceRegistrations 226 } 227 228 func (a *AllocRegistration) copy() *AllocRegistration { 229 c := &AllocRegistration{ 230 Tasks: make(map[string]*ServiceRegistrations, len(a.Tasks)), 231 } 232 233 for k, v := range a.Tasks { 234 c.Tasks[k] = v.copy() 235 } 236 237 return c 238 } 239 240 // NumServices returns the number of registered services 241 func (a *AllocRegistration) NumServices() int { 242 if a == nil { 243 return 0 244 } 245 246 total := 0 247 for _, treg := range a.Tasks { 248 for _, sreg := range treg.Services { 249 if sreg.Service != nil { 250 total++ 251 } 252 } 253 } 254 255 return total 256 } 257 258 // NumChecks returns the number of registered checks 259 func (a *AllocRegistration) NumChecks() int { 260 if a == nil { 261 return 0 262 } 263 264 total := 0 265 for _, treg := range a.Tasks { 266 for _, sreg := range treg.Services { 267 total += len(sreg.Checks) 268 } 269 } 270 271 return total 272 } 273 274 // ServiceRegistrations holds the status of services registered for a particular 275 // task or task group. 276 type ServiceRegistrations struct { 277 Services map[string]*ServiceRegistration 278 } 279 280 func (t *ServiceRegistrations) copy() *ServiceRegistrations { 281 c := &ServiceRegistrations{ 282 Services: make(map[string]*ServiceRegistration, len(t.Services)), 283 } 284 285 for k, v := range t.Services { 286 c.Services[k] = v.copy() 287 } 288 289 return c 290 } 291 292 // ServiceRegistration holds the status of a registered Consul Service and its 293 // Checks. 294 type ServiceRegistration struct { 295 // serviceID and checkIDs are internal fields that track just the IDs of the 296 // services/checks registered in Consul. It is used to materialize the other 297 // fields when queried. 298 serviceID string 299 checkIDs map[string]struct{} 300 301 // Service is the AgentService registered in Consul. 302 Service *api.AgentService 303 304 // Checks is the status of the registered checks. 305 Checks []*api.AgentCheck 306 } 307 308 func (s *ServiceRegistration) copy() *ServiceRegistration { 309 // Copy does not copy the external fields but only the internal fields. This 310 // is so that the caller of AllocRegistrations can not access the internal 311 // fields and that method uses these fields to populate the external fields. 312 return &ServiceRegistration{ 313 serviceID: s.serviceID, 314 checkIDs: helper.CopyMapStringStruct(s.checkIDs), 315 } 316 } 317 318 // ServiceClient handles task and agent service registration with Consul. 319 type ServiceClient struct { 320 client AgentAPI 321 logger log.Logger 322 retryInterval time.Duration 323 maxRetryInterval time.Duration 324 periodicInterval time.Duration 325 326 // exitCh is closed when the main Run loop exits 327 exitCh chan struct{} 328 329 // shutdownCh is closed when the client should shutdown 330 shutdownCh chan struct{} 331 332 // shutdownWait is how long Shutdown() blocks waiting for the final 333 // sync() to finish. Defaults to defaultShutdownWait 334 shutdownWait time.Duration 335 336 opCh chan *operations 337 338 services map[string]*api.AgentServiceRegistration 339 checks map[string]*api.AgentCheckRegistration 340 341 explicitlyDeregisteredServices map[string]bool 342 explicitlyDeregisteredChecks map[string]bool 343 344 // allocRegistrations stores the services and checks that are registered 345 // with Consul by allocation ID. 346 allocRegistrations map[string]*AllocRegistration 347 allocRegistrationsLock sync.RWMutex 348 349 // agent services and checks record entries for the agent itself which 350 // should be removed on shutdown 351 agentServices map[string]struct{} 352 agentChecks map[string]struct{} 353 agentLock sync.Mutex 354 355 // seen is 1 if Consul has ever been seen; otherwise 0. Accessed with 356 // atomics. 357 seen int32 358 359 // deregisterProbationExpiry is the time before which consul sync shouldn't deregister 360 // unknown services. 361 // Used to mitigate risk of deleting restored services upon client restart. 362 deregisterProbationExpiry time.Time 363 364 // checkWatcher restarts checks that are unhealthy. 365 checkWatcher *checkWatcher 366 367 // isClientAgent specifies whether this Consul client is being used 368 // by a Nomad client. 369 isClientAgent bool 370 } 371 372 // NewServiceClient creates a new Consul ServiceClient from an existing Consul API 373 // Client, logger and takes whether the client is being used by a Nomad Client agent. 374 // When being used by a Nomad client, this Consul client reconciles all services and 375 // checks created by Nomad on behalf of running tasks. 376 func NewServiceClient(consulClient AgentAPI, logger log.Logger, isNomadClient bool) *ServiceClient { 377 logger = logger.ResetNamed("consul.sync") 378 return &ServiceClient{ 379 client: consulClient, 380 logger: logger, 381 retryInterval: defaultRetryInterval, 382 maxRetryInterval: defaultMaxRetryInterval, 383 periodicInterval: defaultPeriodicInterval, 384 exitCh: make(chan struct{}), 385 shutdownCh: make(chan struct{}), 386 shutdownWait: defaultShutdownWait, 387 opCh: make(chan *operations, 8), 388 services: make(map[string]*api.AgentServiceRegistration), 389 checks: make(map[string]*api.AgentCheckRegistration), 390 explicitlyDeregisteredServices: make(map[string]bool), 391 explicitlyDeregisteredChecks: make(map[string]bool), 392 allocRegistrations: make(map[string]*AllocRegistration), 393 agentServices: make(map[string]struct{}), 394 agentChecks: make(map[string]struct{}), 395 checkWatcher: newCheckWatcher(logger, consulClient), 396 isClientAgent: isNomadClient, 397 deregisterProbationExpiry: time.Now().Add(deregisterProbationPeriod), 398 } 399 } 400 401 // seen is used by markSeen and hasSeen 402 const seen = 1 403 404 // markSeen marks Consul as having been seen (meaning at least one operation 405 // has succeeded). 406 func (c *ServiceClient) markSeen() { 407 atomic.StoreInt32(&c.seen, seen) 408 } 409 410 // hasSeen returns true if any Consul operation has ever succeeded. Useful to 411 // squelch errors if Consul isn't running. 412 func (c *ServiceClient) hasSeen() bool { 413 return atomic.LoadInt32(&c.seen) == seen 414 } 415 416 // syncReason indicates why a sync operation with consul is about to happen. 417 // 418 // The trigger for a sync may have implications on the behavior of the sync itself. 419 // In particular if a service is defined with enable_tag_override=true, the sync 420 // should ignore changes to the service's Tags field. 421 type syncReason byte 422 423 const ( 424 syncPeriodic = iota 425 syncShutdown 426 syncNewOps 427 ) 428 429 // Run the Consul main loop which retries operations against Consul. It should 430 // be called exactly once. 431 func (c *ServiceClient) Run() { 432 defer close(c.exitCh) 433 434 ctx, cancel := context.WithCancel(context.Background()) 435 defer cancel() 436 437 // init will be closed when Consul has been contacted 438 init := make(chan struct{}) 439 go checkConsulTLSSkipVerify(ctx, c.logger, c.client, init) 440 441 // Process operations while waiting for initial contact with Consul but 442 // do not sync until contact has been made. 443 INIT: 444 for { 445 select { 446 case <-init: 447 c.markSeen() 448 break INIT 449 case <-c.shutdownCh: 450 return 451 case ops := <-c.opCh: 452 c.merge(ops) 453 } 454 } 455 c.logger.Trace("able to contact Consul") 456 457 // Block until contact with Consul has been established 458 // Start checkWatcher 459 go c.checkWatcher.Run(ctx) 460 461 // Always immediately sync to reconcile Nomad and Consul's state 462 retryTimer := time.NewTimer(0) 463 464 failures := 0 465 for { 466 // On every iteration take note of what the trigger for the next sync 467 // was, so that it may be referenced during the sync itself. 468 var reasonForSync syncReason 469 470 select { 471 case <-retryTimer.C: 472 reasonForSync = syncPeriodic 473 case <-c.shutdownCh: 474 reasonForSync = syncShutdown 475 // Cancel check watcher but sync one last time 476 cancel() 477 case ops := <-c.opCh: 478 reasonForSync = syncNewOps 479 c.merge(ops) 480 } 481 482 if err := c.sync(reasonForSync); err != nil { 483 if failures == 0 { 484 // Log on the first failure 485 c.logger.Warn("failed to update services in Consul", "error", err) 486 } else if failures%10 == 0 { 487 // Log every 10th consecutive failure 488 c.logger.Error("still unable to update services in Consul", "failures", failures, "error", err) 489 } 490 491 failures++ 492 if !retryTimer.Stop() { 493 // Timer already expired, since the timer may 494 // or may not have been read in the select{} 495 // above, conditionally receive on it 496 select { 497 case <-retryTimer.C: 498 default: 499 } 500 } 501 backoff := c.retryInterval * time.Duration(failures) 502 if backoff > c.maxRetryInterval { 503 backoff = c.maxRetryInterval 504 } 505 retryTimer.Reset(backoff) 506 } else { 507 if failures > 0 { 508 c.logger.Info("successfully updated services in Consul") 509 failures = 0 510 } 511 512 // on successful sync, clear deregistered consul entities 513 c.clearExplicitlyDeregistered() 514 515 // Reset timer to periodic interval to periodically 516 // reconile with Consul 517 if !retryTimer.Stop() { 518 select { 519 case <-retryTimer.C: 520 default: 521 } 522 } 523 retryTimer.Reset(c.periodicInterval) 524 } 525 526 select { 527 case <-c.shutdownCh: 528 // Exit only after sync'ing all outstanding operations 529 if len(c.opCh) > 0 { 530 for len(c.opCh) > 0 { 531 c.merge(<-c.opCh) 532 } 533 continue 534 } 535 return 536 default: 537 } 538 539 } 540 } 541 542 // commit operations unless already shutting down. 543 func (c *ServiceClient) commit(ops *operations) { 544 select { 545 case c.opCh <- ops: 546 case <-c.shutdownCh: 547 } 548 } 549 550 func (c *ServiceClient) clearExplicitlyDeregistered() { 551 c.explicitlyDeregisteredServices = map[string]bool{} 552 c.explicitlyDeregisteredChecks = map[string]bool{} 553 } 554 555 // merge registrations into state map prior to sync'ing with Consul 556 func (c *ServiceClient) merge(ops *operations) { 557 for _, s := range ops.regServices { 558 c.services[s.ID] = s 559 } 560 for _, check := range ops.regChecks { 561 c.checks[check.ID] = check 562 } 563 for _, sid := range ops.deregServices { 564 delete(c.services, sid) 565 c.explicitlyDeregisteredServices[sid] = true 566 } 567 for _, cid := range ops.deregChecks { 568 delete(c.checks, cid) 569 c.explicitlyDeregisteredChecks[cid] = true 570 } 571 metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services))) 572 metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks))) 573 } 574 575 // sync enqueued operations. 576 func (c *ServiceClient) sync(reason syncReason) error { 577 sreg, creg, sdereg, cdereg := 0, 0, 0, 0 578 579 consulServices, err := c.client.Services() 580 if err != nil { 581 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 582 return fmt.Errorf("error querying Consul services: %v", err) 583 } 584 585 consulChecks, err := c.client.Checks() 586 if err != nil { 587 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 588 return fmt.Errorf("error querying Consul checks: %v", err) 589 } 590 591 inProbation := time.Now().Before(c.deregisterProbationExpiry) 592 593 // Remove Nomad services in Consul but unknown locally 594 for id := range consulServices { 595 if _, ok := c.services[id]; ok { 596 // Known service, skip 597 continue 598 } 599 600 // Ignore if this is not a Nomad managed service. Also ignore 601 // Nomad managed services if this is not a client agent. 602 // This is to prevent server agents from removing services 603 // registered by client agents 604 if !isNomadService(id) || !c.isClientAgent { 605 // Not managed by Nomad, skip 606 continue 607 } 608 609 // Ignore unknown services during probation 610 if inProbation && !c.explicitlyDeregisteredServices[id] { 611 continue 612 } 613 614 // Ignore if this is a service for a Nomad managed sidecar proxy. 615 if isNomadSidecar(id, c.services) { 616 continue 617 } 618 619 // Unknown Nomad managed service; kill 620 if err := c.client.ServiceDeregister(id); err != nil { 621 if isOldNomadService(id) { 622 // Don't hard-fail on old entries. See #3620 623 continue 624 } 625 626 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 627 return err 628 } 629 sdereg++ 630 metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1) 631 } 632 633 // Add Nomad services missing from Consul, or where the service has been updated. 634 for id, serviceInNomad := range c.services { 635 636 serviceInConsul, exists := consulServices[id] 637 sidecarInConsul := getNomadSidecar(id, consulServices) 638 639 if !exists || agentServiceUpdateRequired(reason, serviceInNomad, serviceInConsul, sidecarInConsul) { 640 if err = c.client.ServiceRegister(serviceInNomad); err != nil { 641 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 642 return err 643 } 644 sreg++ 645 metrics.IncrCounter([]string{"client", "consul", "service_registrations"}, 1) 646 } 647 648 } 649 650 // Remove Nomad checks in Consul but unknown locally 651 for id, check := range consulChecks { 652 if _, ok := c.checks[id]; ok { 653 // Known check, leave it 654 continue 655 } 656 657 // Ignore if this is not a Nomad managed check. Also ignore 658 // Nomad managed checks if this is not a client agent. 659 // This is to prevent server agents from removing checks 660 // registered by client agents 661 if !isNomadService(check.ServiceID) || !c.isClientAgent || !isNomadCheck(check.CheckID) { 662 // Service not managed by Nomad, skip 663 continue 664 } 665 666 // Ignore unknown services during probation 667 if inProbation && !c.explicitlyDeregisteredChecks[id] { 668 continue 669 } 670 671 // Ignore if this is a check for a Nomad managed sidecar proxy. 672 if isNomadSidecar(check.ServiceID, c.services) { 673 continue 674 } 675 676 // Unknown Nomad managed check; remove 677 if err := c.client.CheckDeregister(id); err != nil { 678 if isOldNomadService(check.ServiceID) { 679 // Don't hard-fail on old entries. 680 continue 681 } 682 683 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 684 return err 685 } 686 cdereg++ 687 metrics.IncrCounter([]string{"client", "consul", "check_deregistrations"}, 1) 688 } 689 690 // Add Nomad checks missing from Consul 691 for id, check := range c.checks { 692 if _, ok := consulChecks[id]; ok { 693 // Already in Consul; skipping 694 continue 695 } 696 697 if err := c.client.CheckRegister(check); err != nil { 698 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 699 return err 700 } 701 creg++ 702 metrics.IncrCounter([]string{"client", "consul", "check_registrations"}, 1) 703 } 704 705 // Only log if something was actually synced 706 if sreg > 0 || sdereg > 0 || creg > 0 || cdereg > 0 { 707 c.logger.Debug("sync complete", "registered_services", sreg, "deregistered_services", sdereg, 708 "registered_checks", creg, "deregistered_checks", cdereg) 709 } 710 return nil 711 } 712 713 // RegisterAgent registers Nomad agents (client or server). The 714 // Service.PortLabel should be a literal port to be parsed with SplitHostPort. 715 // Script checks are not supported and will return an error. Registration is 716 // asynchronous. 717 // 718 // Agents will be deregistered when Shutdown is called. 719 func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error { 720 ops := operations{} 721 722 for _, service := range services { 723 id := makeAgentServiceID(role, service) 724 725 // Unlike tasks, agents don't use port labels. Agent ports are 726 // stored directly in the PortLabel. 727 host, rawport, err := net.SplitHostPort(service.PortLabel) 728 if err != nil { 729 return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err) 730 } 731 port, err := strconv.Atoi(rawport) 732 if err != nil { 733 return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err) 734 } 735 serviceReg := &api.AgentServiceRegistration{ 736 ID: id, 737 Name: service.Name, 738 Tags: service.Tags, 739 Address: host, 740 Port: port, 741 // This enables the consul UI to show that Nomad registered this service 742 Meta: map[string]string{ 743 "external-source": "nomad", 744 }, 745 } 746 ops.regServices = append(ops.regServices, serviceReg) 747 748 for _, check := range service.Checks { 749 checkID := MakeCheckID(id, check) 750 if check.Type == structs.ServiceCheckScript { 751 return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name) 752 } 753 checkHost, checkPort := serviceReg.Address, serviceReg.Port 754 if check.PortLabel != "" { 755 // Unlike tasks, agents don't use port labels. Agent ports are 756 // stored directly in the PortLabel. 757 host, rawport, err := net.SplitHostPort(check.PortLabel) 758 if err != nil { 759 return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err) 760 } 761 port, err := strconv.Atoi(rawport) 762 if err != nil { 763 return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err) 764 } 765 checkHost, checkPort = host, port 766 } 767 checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort) 768 if err != nil { 769 return fmt.Errorf("failed to add check %q: %v", check.Name, err) 770 } 771 ops.regChecks = append(ops.regChecks, checkReg) 772 } 773 } 774 775 // Don't bother committing agent checks if we're already shutting down 776 c.agentLock.Lock() 777 defer c.agentLock.Unlock() 778 select { 779 case <-c.shutdownCh: 780 return nil 781 default: 782 } 783 784 // Now add them to the registration queue 785 c.commit(&ops) 786 787 // Record IDs for deregistering on shutdown 788 for _, id := range ops.regServices { 789 c.agentServices[id.ID] = struct{}{} 790 } 791 for _, id := range ops.regChecks { 792 c.agentChecks[id.ID] = struct{}{} 793 } 794 return nil 795 } 796 797 // serviceRegs creates service registrations, check registrations, and script 798 // checks from a service. It returns a service registration object with the 799 // service and check IDs populated. 800 func (c *ServiceClient) serviceRegs(ops *operations, service *structs.Service, workload *WorkloadServices) ( 801 *ServiceRegistration, error) { 802 803 // Get the services ID 804 id := MakeAllocServiceID(workload.AllocID, workload.Name(), service) 805 sreg := &ServiceRegistration{ 806 serviceID: id, 807 checkIDs: make(map[string]struct{}, len(service.Checks)), 808 } 809 810 // Service address modes default to auto 811 addrMode := service.AddressMode 812 if addrMode == "" { 813 addrMode = structs.AddressModeAuto 814 } 815 816 // Determine the address to advertise based on the mode 817 ip, port, err := getAddress(addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork) 818 if err != nil { 819 return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err) 820 } 821 822 // Determine whether to use tags or canary_tags 823 var tags []string 824 if workload.Canary && len(service.CanaryTags) > 0 { 825 tags = make([]string, len(service.CanaryTags)) 826 copy(tags, service.CanaryTags) 827 } else { 828 tags = make([]string, len(service.Tags)) 829 copy(tags, service.Tags) 830 } 831 832 // newConnect returns (nil, nil) if there's no Connect-enabled service. 833 connect, err := newConnect(service.Name, service.Connect, workload.Networks) 834 if err != nil { 835 return nil, fmt.Errorf("invalid Consul Connect configuration for service %q: %v", service.Name, err) 836 } 837 838 // Determine whether to use meta or canary_meta 839 var meta map[string]string 840 if workload.Canary && len(service.CanaryMeta) > 0 { 841 meta = make(map[string]string, len(service.CanaryMeta)+1) 842 for k, v := range service.CanaryMeta { 843 meta[k] = v 844 } 845 } else { 846 meta = make(map[string]string, len(service.Meta)+1) 847 for k, v := range service.Meta { 848 meta[k] = v 849 } 850 } 851 852 // This enables the consul UI to show that Nomad registered this service 853 meta["external-source"] = "nomad" 854 855 // Build the Consul Service registration request 856 serviceReg := &api.AgentServiceRegistration{ 857 ID: id, 858 Name: service.Name, 859 Tags: tags, 860 EnableTagOverride: service.EnableTagOverride, 861 Address: ip, 862 Port: port, 863 Meta: meta, 864 Connect: connect, // will be nil if no Connect stanza 865 } 866 ops.regServices = append(ops.regServices, serviceReg) 867 868 // Build the check registrations 869 checkIDs, err := c.checkRegs(ops, id, service, workload) 870 if err != nil { 871 return nil, err 872 } 873 for _, cid := range checkIDs { 874 sreg.checkIDs[cid] = struct{}{} 875 } 876 return sreg, nil 877 } 878 879 // checkRegs registers the checks for the given service and returns the 880 // registered check ids. 881 func (c *ServiceClient) checkRegs(ops *operations, serviceID string, service *structs.Service, 882 workload *WorkloadServices) ([]string, error) { 883 884 // Fast path 885 numChecks := len(service.Checks) 886 if numChecks == 0 { 887 return nil, nil 888 } 889 890 checkIDs := make([]string, 0, numChecks) 891 for _, check := range service.Checks { 892 checkID := MakeCheckID(serviceID, check) 893 checkIDs = append(checkIDs, checkID) 894 if check.Type == structs.ServiceCheckScript { 895 // Skip getAddress for script checks 896 checkReg, err := createCheckReg(serviceID, checkID, check, "", 0) 897 if err != nil { 898 return nil, fmt.Errorf("failed to add script check %q: %v", check.Name, err) 899 } 900 ops.regChecks = append(ops.regChecks, checkReg) 901 continue 902 } 903 904 // Default to the service's port but allow check to override 905 portLabel := check.PortLabel 906 if portLabel == "" { 907 // Default to the service's port label 908 portLabel = service.PortLabel 909 } 910 911 // Checks address mode defaults to host for pre-#3380 backward compat 912 addrMode := check.AddressMode 913 if addrMode == "" { 914 addrMode = structs.AddressModeHost 915 } 916 917 ip, port, err := getAddress(addrMode, portLabel, workload.Networks, workload.DriverNetwork) 918 if err != nil { 919 return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err) 920 } 921 922 checkReg, err := createCheckReg(serviceID, checkID, check, ip, port) 923 if err != nil { 924 return nil, fmt.Errorf("failed to add check %q: %v", check.Name, err) 925 } 926 ops.regChecks = append(ops.regChecks, checkReg) 927 } 928 return checkIDs, nil 929 } 930 931 // RegisterWorkload with Consul. Adds all service entries and checks to Consul. 932 // 933 // If the service IP is set it used as the address in the service registration. 934 // Checks will always use the IP from the Task struct (host's IP). 935 // 936 // Actual communication with Consul is done asynchronously (see Run). 937 func (c *ServiceClient) RegisterWorkload(workload *WorkloadServices) error { 938 // Fast path 939 numServices := len(workload.Services) 940 if numServices == 0 { 941 return nil 942 } 943 944 t := new(ServiceRegistrations) 945 t.Services = make(map[string]*ServiceRegistration, numServices) 946 947 ops := &operations{} 948 for _, service := range workload.Services { 949 sreg, err := c.serviceRegs(ops, service, workload) 950 if err != nil { 951 return err 952 } 953 t.Services[sreg.serviceID] = sreg 954 } 955 956 // Add the workload to the allocation's registration 957 c.addRegistrations(workload.AllocID, workload.Name(), t) 958 959 c.commit(ops) 960 961 // Start watching checks. Done after service registrations are built 962 // since an error building them could leak watches. 963 for _, service := range workload.Services { 964 serviceID := MakeAllocServiceID(workload.AllocID, workload.Name(), service) 965 for _, check := range service.Checks { 966 if check.TriggersRestarts() { 967 checkID := MakeCheckID(serviceID, check) 968 c.checkWatcher.Watch(workload.AllocID, workload.Name(), checkID, check, workload.Restarter) 969 } 970 } 971 } 972 return nil 973 } 974 975 // UpdateWorkload in Consul. Does not alter the service if only checks have 976 // changed. 977 // 978 // DriverNetwork must not change between invocations for the same allocation. 979 func (c *ServiceClient) UpdateWorkload(old, newWorkload *WorkloadServices) error { 980 ops := new(operations) 981 regs := new(ServiceRegistrations) 982 regs.Services = make(map[string]*ServiceRegistration, len(newWorkload.Services)) 983 984 existingIDs := make(map[string]*structs.Service, len(old.Services)) 985 for _, s := range old.Services { 986 existingIDs[MakeAllocServiceID(old.AllocID, old.Name(), s)] = s 987 } 988 newIDs := make(map[string]*structs.Service, len(newWorkload.Services)) 989 for _, s := range newWorkload.Services { 990 newIDs[MakeAllocServiceID(newWorkload.AllocID, newWorkload.Name(), s)] = s 991 } 992 993 // Loop over existing Service IDs to see if they have been removed 994 for existingID, existingSvc := range existingIDs { 995 newSvc, ok := newIDs[existingID] 996 997 if !ok { 998 // Existing service entry removed 999 ops.deregServices = append(ops.deregServices, existingID) 1000 for _, check := range existingSvc.Checks { 1001 cid := MakeCheckID(existingID, check) 1002 ops.deregChecks = append(ops.deregChecks, cid) 1003 1004 // Unwatch watched checks 1005 if check.TriggersRestarts() { 1006 c.checkWatcher.Unwatch(cid) 1007 } 1008 } 1009 continue 1010 } 1011 1012 oldHash := existingSvc.Hash(old.AllocID, old.Name(), old.Canary) 1013 newHash := newSvc.Hash(newWorkload.AllocID, newWorkload.Name(), newWorkload.Canary) 1014 if oldHash == newHash { 1015 // Service exists and hasn't changed, don't re-add it later 1016 delete(newIDs, existingID) 1017 } 1018 1019 // Service still exists so add it to the task's registration 1020 sreg := &ServiceRegistration{ 1021 serviceID: existingID, 1022 checkIDs: make(map[string]struct{}, len(newSvc.Checks)), 1023 } 1024 regs.Services[existingID] = sreg 1025 1026 // See if any checks were updated 1027 existingChecks := make(map[string]*structs.ServiceCheck, len(existingSvc.Checks)) 1028 for _, check := range existingSvc.Checks { 1029 existingChecks[MakeCheckID(existingID, check)] = check 1030 } 1031 1032 // Register new checks 1033 for _, check := range newSvc.Checks { 1034 checkID := MakeCheckID(existingID, check) 1035 if _, exists := existingChecks[checkID]; exists { 1036 // Check is still required. Remove it from the map so it doesn't get 1037 // deleted later. 1038 delete(existingChecks, checkID) 1039 sreg.checkIDs[checkID] = struct{}{} 1040 } 1041 1042 // New check on an unchanged service; add them now 1043 newCheckIDs, err := c.checkRegs(ops, existingID, newSvc, newWorkload) 1044 if err != nil { 1045 return err 1046 } 1047 1048 for _, checkID := range newCheckIDs { 1049 sreg.checkIDs[checkID] = struct{}{} 1050 } 1051 1052 // Update all watched checks as CheckRestart fields aren't part of ID 1053 if check.TriggersRestarts() { 1054 c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter) 1055 } 1056 } 1057 1058 // Remove existing checks not in updated service 1059 for cid, check := range existingChecks { 1060 ops.deregChecks = append(ops.deregChecks, cid) 1061 1062 // Unwatch checks 1063 if check.TriggersRestarts() { 1064 c.checkWatcher.Unwatch(cid) 1065 } 1066 } 1067 } 1068 1069 // Any remaining services should just be enqueued directly 1070 for _, newSvc := range newIDs { 1071 sreg, err := c.serviceRegs(ops, newSvc, newWorkload) 1072 if err != nil { 1073 return err 1074 } 1075 1076 regs.Services[sreg.serviceID] = sreg 1077 } 1078 1079 // Add the task to the allocation's registration 1080 c.addRegistrations(newWorkload.AllocID, newWorkload.Name(), regs) 1081 1082 c.commit(ops) 1083 1084 // Start watching checks. Done after service registrations are built 1085 // since an error building them could leak watches. 1086 for _, service := range newIDs { 1087 serviceID := MakeAllocServiceID(newWorkload.AllocID, newWorkload.Name(), service) 1088 for _, check := range service.Checks { 1089 if check.TriggersRestarts() { 1090 checkID := MakeCheckID(serviceID, check) 1091 c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter) 1092 } 1093 } 1094 } 1095 1096 return nil 1097 } 1098 1099 // RemoveWorkload from Consul. Removes all service entries and checks. 1100 // 1101 // Actual communication with Consul is done asynchronously (see Run). 1102 func (c *ServiceClient) RemoveWorkload(workload *WorkloadServices) { 1103 ops := operations{} 1104 1105 for _, service := range workload.Services { 1106 id := MakeAllocServiceID(workload.AllocID, workload.Name(), service) 1107 ops.deregServices = append(ops.deregServices, id) 1108 1109 for _, check := range service.Checks { 1110 cid := MakeCheckID(id, check) 1111 ops.deregChecks = append(ops.deregChecks, cid) 1112 1113 if check.TriggersRestarts() { 1114 c.checkWatcher.Unwatch(cid) 1115 } 1116 } 1117 } 1118 1119 // Remove the workload from the alloc's registrations 1120 c.removeRegistration(workload.AllocID, workload.Name()) 1121 1122 // Now add them to the deregistration fields; main Run loop will update 1123 c.commit(&ops) 1124 } 1125 1126 // AllocRegistrations returns the registrations for the given allocation. If the 1127 // allocation has no reservations, the response is a nil object. 1128 func (c *ServiceClient) AllocRegistrations(allocID string) (*AllocRegistration, error) { 1129 // Get the internal struct using the lock 1130 c.allocRegistrationsLock.RLock() 1131 regInternal, ok := c.allocRegistrations[allocID] 1132 if !ok { 1133 c.allocRegistrationsLock.RUnlock() 1134 return nil, nil 1135 } 1136 1137 // Copy so we don't expose internal structs 1138 reg := regInternal.copy() 1139 c.allocRegistrationsLock.RUnlock() 1140 1141 // Query the services and checks to populate the allocation registrations. 1142 services, err := c.client.Services() 1143 if err != nil { 1144 return nil, err 1145 } 1146 1147 checks, err := c.client.Checks() 1148 if err != nil { 1149 return nil, err 1150 } 1151 1152 // Populate the object 1153 for _, treg := range reg.Tasks { 1154 for serviceID, sreg := range treg.Services { 1155 sreg.Service = services[serviceID] 1156 for checkID := range sreg.checkIDs { 1157 if check, ok := checks[checkID]; ok { 1158 sreg.Checks = append(sreg.Checks, check) 1159 } 1160 } 1161 } 1162 } 1163 1164 return reg, nil 1165 } 1166 1167 // UpdateTTL is used to update the TTL of a check. Typically this will only be 1168 // called to heartbeat script checks. 1169 func (c *ServiceClient) UpdateTTL(id, output, status string) error { 1170 return c.client.UpdateTTL(id, output, status) 1171 } 1172 1173 // Shutdown the Consul client. Update running task registrations and deregister 1174 // agent from Consul. On first call blocks up to shutdownWait before giving up 1175 // on syncing operations. 1176 func (c *ServiceClient) Shutdown() error { 1177 // Serialize Shutdown calls with RegisterAgent to prevent leaking agent 1178 // entries. 1179 c.agentLock.Lock() 1180 defer c.agentLock.Unlock() 1181 select { 1182 case <-c.shutdownCh: 1183 return nil 1184 default: 1185 close(c.shutdownCh) 1186 } 1187 1188 // Give run loop time to sync, but don't block indefinitely 1189 deadline := time.After(c.shutdownWait) 1190 1191 // Wait for Run to finish any outstanding operations and exit 1192 select { 1193 case <-c.exitCh: 1194 case <-deadline: 1195 // Don't wait forever though 1196 } 1197 1198 // If Consul was never seen nothing could be written so exit early 1199 if !c.hasSeen() { 1200 return nil 1201 } 1202 1203 // Always attempt to deregister Nomad agent Consul entries, even if 1204 // deadline was reached 1205 for id := range c.agentServices { 1206 if err := c.client.ServiceDeregister(id); err != nil { 1207 c.logger.Error("failed deregistering agent service", "service_id", id, "error", err) 1208 } 1209 } 1210 for id := range c.agentChecks { 1211 if err := c.client.CheckDeregister(id); err != nil { 1212 c.logger.Error("failed deregistering agent check", "check_id", id, "error", err) 1213 } 1214 } 1215 1216 return nil 1217 } 1218 1219 // addRegistration adds the service registrations for the given allocation. 1220 func (c *ServiceClient) addRegistrations(allocID, taskName string, reg *ServiceRegistrations) { 1221 c.allocRegistrationsLock.Lock() 1222 defer c.allocRegistrationsLock.Unlock() 1223 1224 alloc, ok := c.allocRegistrations[allocID] 1225 if !ok { 1226 alloc = &AllocRegistration{ 1227 Tasks: make(map[string]*ServiceRegistrations), 1228 } 1229 c.allocRegistrations[allocID] = alloc 1230 } 1231 alloc.Tasks[taskName] = reg 1232 } 1233 1234 // removeRegistrations removes the registration for the given allocation. 1235 func (c *ServiceClient) removeRegistration(allocID, taskName string) { 1236 c.allocRegistrationsLock.Lock() 1237 defer c.allocRegistrationsLock.Unlock() 1238 1239 alloc, ok := c.allocRegistrations[allocID] 1240 if !ok { 1241 return 1242 } 1243 1244 // Delete the task and if it is the last one also delete the alloc's 1245 // registration 1246 delete(alloc.Tasks, taskName) 1247 if len(alloc.Tasks) == 0 { 1248 delete(c.allocRegistrations, allocID) 1249 } 1250 } 1251 1252 // makeAgentServiceID creates a unique ID for identifying an agent service in 1253 // Consul. 1254 // 1255 // Agent service IDs are of the form: 1256 // 1257 // {nomadServicePrefix}-{ROLE}-b32(sha1({Service.Name}-{Service.Tags...}) 1258 // Example Server ID: _nomad-server-fbbk265qn4tmt25nd4ep42tjvmyj3hr4 1259 // Example Client ID: _nomad-client-ggnjpgl7yn7rgmvxzilmpvrzzvrszc7l 1260 // 1261 func makeAgentServiceID(role string, service *structs.Service) string { 1262 return fmt.Sprintf("%s-%s-%s", nomadServicePrefix, role, service.Hash(role, "", false)) 1263 } 1264 1265 // MakeAllocServiceID creates a unique ID for identifying an alloc service in 1266 // Consul. 1267 // 1268 // Example Service ID: _nomad-task-b4e61df9-b095-d64e-f241-23860da1375f-redis-http-http 1269 func MakeAllocServiceID(allocID, taskName string, service *structs.Service) string { 1270 return fmt.Sprintf("%s%s-%s-%s-%s", nomadTaskPrefix, allocID, taskName, service.Name, service.PortLabel) 1271 } 1272 1273 // MakeCheckID creates a unique ID for a check. 1274 // 1275 // Example Check ID: _nomad-check-434ae42f9a57c5705344974ac38de2aee0ee089d 1276 func MakeCheckID(serviceID string, check *structs.ServiceCheck) string { 1277 return fmt.Sprintf("%s%s", nomadCheckPrefix, check.Hash(serviceID)) 1278 } 1279 1280 // createCheckReg creates a Check that can be registered with Consul. 1281 // 1282 // Script checks simply have a TTL set and the caller is responsible for 1283 // running the script and heart-beating. 1284 func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) { 1285 chkReg := api.AgentCheckRegistration{ 1286 ID: checkID, 1287 Name: check.Name, 1288 ServiceID: serviceID, 1289 } 1290 chkReg.Status = check.InitialStatus 1291 chkReg.Timeout = check.Timeout.String() 1292 chkReg.Interval = check.Interval.String() 1293 1294 // Require an address for http or tcp checks 1295 if port == 0 && check.RequiresPort() { 1296 return nil, fmt.Errorf("%s checks require an address", check.Type) 1297 } 1298 1299 switch check.Type { 1300 case structs.ServiceCheckHTTP: 1301 proto := check.Protocol 1302 if proto == "" { 1303 proto = "http" 1304 } 1305 if check.TLSSkipVerify { 1306 chkReg.TLSSkipVerify = true 1307 } 1308 base := url.URL{ 1309 Scheme: proto, 1310 Host: net.JoinHostPort(host, strconv.Itoa(port)), 1311 } 1312 relative, err := url.Parse(check.Path) 1313 if err != nil { 1314 return nil, err 1315 } 1316 checkURL := base.ResolveReference(relative) 1317 chkReg.HTTP = checkURL.String() 1318 chkReg.Method = check.Method 1319 chkReg.Header = check.Header 1320 1321 case structs.ServiceCheckTCP: 1322 chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port)) 1323 1324 case structs.ServiceCheckScript: 1325 chkReg.TTL = (check.Interval + ttlCheckBuffer).String() 1326 // As of Consul 1.0.0 setting TTL and Interval is a 400 1327 chkReg.Interval = "" 1328 1329 case structs.ServiceCheckGRPC: 1330 chkReg.GRPC = fmt.Sprintf("%s/%s", net.JoinHostPort(host, strconv.Itoa(port)), check.GRPCService) 1331 chkReg.GRPCUseTLS = check.GRPCUseTLS 1332 if check.TLSSkipVerify { 1333 chkReg.TLSSkipVerify = true 1334 } 1335 1336 default: 1337 return nil, fmt.Errorf("check type %+q not valid", check.Type) 1338 } 1339 return &chkReg, nil 1340 } 1341 1342 // isNomadCheck returns true if the ID matches the pattern of a Nomad managed 1343 // check. 1344 func isNomadCheck(id string) bool { 1345 return strings.HasPrefix(id, nomadCheckPrefix) 1346 } 1347 1348 // isNomadService returns true if the ID matches the pattern of a Nomad managed 1349 // service (new or old formats). Agent services return false as independent 1350 // client and server agents may be running on the same machine. #2827 1351 func isNomadService(id string) bool { 1352 return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id) 1353 } 1354 1355 // isOldNomadService returns true if the ID matches an old pattern managed by 1356 // Nomad. 1357 // 1358 // Pre-0.7.1 task service IDs are of the form: 1359 // 1360 // {nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...} 1361 // Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3 1362 // 1363 func isOldNomadService(id string) bool { 1364 const prefix = nomadServicePrefix + "-executor" 1365 return strings.HasPrefix(id, prefix) 1366 } 1367 1368 const ( 1369 sidecarSuffix = "-sidecar-proxy" 1370 ) 1371 1372 // isNomadSidecar returns true if the ID matches a sidecar proxy for a Nomad 1373 // managed service. 1374 // 1375 // For example if you have a Connect enabled service with the ID: 1376 // 1377 // _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db 1378 // 1379 // Consul will create a service for the sidecar proxy with the ID: 1380 // 1381 // _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy 1382 // 1383 func isNomadSidecar(id string, services map[string]*api.AgentServiceRegistration) bool { 1384 if !strings.HasSuffix(id, sidecarSuffix) { 1385 return false 1386 } 1387 1388 // Make sure the Nomad managed service for this proxy still exists. 1389 _, ok := services[id[:len(id)-len(sidecarSuffix)]] 1390 return ok 1391 } 1392 1393 // getNomadSidecar returns the service registration of the sidecar for the managed 1394 // service with the specified id. 1395 // 1396 // If the managed service of the specified id does not exist, or the service does 1397 // not have a sidecar proxy, nil is returned. 1398 func getNomadSidecar(id string, services map[string]*api.AgentService) *api.AgentService { 1399 if _, exists := services[id]; !exists { 1400 return nil 1401 } 1402 1403 sidecarID := id + sidecarSuffix 1404 return services[sidecarID] 1405 } 1406 1407 // getAddress returns the IP and port to use for a service or check. If no port 1408 // label is specified (an empty value), zero values are returned because no 1409 // address could be resolved. 1410 func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet *drivers.DriverNetwork) (string, int, error) { 1411 switch addrMode { 1412 case structs.AddressModeAuto: 1413 if driverNet.Advertise() { 1414 addrMode = structs.AddressModeDriver 1415 } else { 1416 addrMode = structs.AddressModeHost 1417 } 1418 return getAddress(addrMode, portLabel, networks, driverNet) 1419 case structs.AddressModeHost: 1420 if portLabel == "" { 1421 if len(networks) != 1 { 1422 // If no networks are specified return zero 1423 // values. Consul will advertise the host IP 1424 // with no port. This is the pre-0.7.1 behavior 1425 // some people rely on. 1426 return "", 0, nil 1427 } 1428 1429 return networks[0].IP, 0, nil 1430 } 1431 1432 // Default path: use host ip:port 1433 ip, port := networks.Port(portLabel) 1434 if ip == "" && port <= 0 { 1435 return "", 0, fmt.Errorf("invalid port %q: port label not found", portLabel) 1436 } 1437 return ip, port, nil 1438 1439 case structs.AddressModeDriver: 1440 // Require a driver network if driver address mode is used 1441 if driverNet == nil { 1442 return "", 0, fmt.Errorf(`cannot use address_mode="driver": no driver network exists`) 1443 } 1444 1445 // If no port label is specified just return the IP 1446 if portLabel == "" { 1447 return driverNet.IP, 0, nil 1448 } 1449 1450 // If the port is a label, use the driver's port (not the host's) 1451 if port, ok := driverNet.PortMap[portLabel]; ok { 1452 return driverNet.IP, port, nil 1453 } 1454 1455 // If port isn't a label, try to parse it as a literal port number 1456 port, err := strconv.Atoi(portLabel) 1457 if err != nil { 1458 // Don't include Atoi error message as user likely 1459 // never intended it to be a numeric and it creates a 1460 // confusing error message 1461 return "", 0, fmt.Errorf("invalid port label %q: port labels in driver address_mode must be numeric or in the driver's port map", portLabel) 1462 } 1463 if port <= 0 { 1464 return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel) 1465 } 1466 1467 return driverNet.IP, port, nil 1468 1469 default: 1470 // Shouldn't happen due to validation, but enforce invariants 1471 return "", 0, fmt.Errorf("invalid address mode %q", addrMode) 1472 } 1473 }