github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/command/agent/consul/service_client.go (about) 1 package consul 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "net/url" 8 "reflect" 9 "strconv" 10 "strings" 11 "sync" 12 "sync/atomic" 13 "time" 14 15 metrics "github.com/armon/go-metrics" 16 log "github.com/hashicorp/go-hclog" 17 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/nomad/helper" 20 "github.com/hashicorp/nomad/nomad/structs" 21 "github.com/hashicorp/nomad/plugins/drivers" 22 ) 23 24 const ( 25 // nomadServicePrefix is the prefix that scopes all Nomad registered 26 // services (both agent and task entries). 27 nomadServicePrefix = "_nomad" 28 29 // nomadTaskPrefix is the prefix that scopes Nomad registered services 30 // for tasks. 31 nomadTaskPrefix = nomadServicePrefix + "-task-" 32 33 // nomadCheckPrefix is the prefix that scopes Nomad registered checks for 34 // services. 35 nomadCheckPrefix = nomadServicePrefix + "-check-" 36 37 // defaultRetryInterval is how quickly to retry syncing services and 38 // checks to Consul when an error occurs. Will backoff up to a max. 39 defaultRetryInterval = time.Second 40 41 // defaultMaxRetryInterval is the default max retry interval. 42 defaultMaxRetryInterval = 30 * time.Second 43 44 // defaultPeriodicalInterval is the interval at which the service 45 // client reconciles state between the desired services and checks and 46 // what's actually registered in Consul. This is done at an interval, 47 // rather than being purely edge triggered, to handle the case that the 48 // Consul agent's state may change underneath us 49 defaultPeriodicInterval = 30 * time.Second 50 51 // ttlCheckBuffer is the time interval that Nomad can take to report Consul 52 // the check result 53 ttlCheckBuffer = 31 * time.Second 54 55 // defaultShutdownWait is how long Shutdown() should block waiting for 56 // enqueued operations to sync to Consul by default. 57 defaultShutdownWait = time.Minute 58 59 // DefaultQueryWaitDuration is the max duration the Consul Agent will 60 // spend waiting for a response from a Consul Query. 61 DefaultQueryWaitDuration = 2 * time.Second 62 63 // ServiceTagHTTP is the tag assigned to HTTP services 64 ServiceTagHTTP = "http" 65 66 // ServiceTagRPC is the tag assigned to RPC services 67 ServiceTagRPC = "rpc" 68 69 // ServiceTagSerf is the tag assigned to Serf services 70 ServiceTagSerf = "serf" 71 72 // deregisterProbationPeriod is the initialization period where 73 // services registered in Consul but not in Nomad don't get deregistered, 74 // to allow for nomad restoring tasks 75 deregisterProbationPeriod = time.Minute 76 ) 77 78 // Additional Consul ACLs required 79 // - Consul Template: key:read 80 // Used in tasks with template stanza that use Consul keys. 81 82 // CatalogAPI is the consul/api.Catalog API used by Nomad. 83 // 84 // ACL requirements 85 // - node:read (listing datacenters) 86 // - service:read 87 type CatalogAPI interface { 88 Datacenters() ([]string, error) 89 Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error) 90 } 91 92 // AgentAPI is the consul/api.Agent API used by Nomad. 93 // 94 // ACL requirements 95 // - agent:read 96 // - service:write 97 type AgentAPI interface { 98 Services() (map[string]*api.AgentService, error) 99 Checks() (map[string]*api.AgentCheck, error) 100 CheckRegister(check *api.AgentCheckRegistration) error 101 CheckDeregister(checkID string) error 102 Self() (map[string]map[string]interface{}, error) 103 ServiceRegister(service *api.AgentServiceRegistration) error 104 ServiceDeregister(serviceID string) error 105 UpdateTTL(id, output, status string) error 106 } 107 108 // ConfigAPI is the consul/api.ConfigEntries API subset used by Nomad Server. 109 // 110 // ACL requirements 111 // - operator:write (server only) 112 type ConfigAPI interface { 113 Set(entry api.ConfigEntry, w *api.WriteOptions) (bool, *api.WriteMeta, error) 114 // Delete(kind, name string, w *api.WriteOptions) (*api.WriteMeta, error) (not used) 115 } 116 117 // ACLsAPI is the consul/api.ACL API subset used by Nomad Server. 118 // 119 // ACL requirements 120 // - acl:write (server only) 121 type ACLsAPI interface { 122 // We are looking up by [operator token] SecretID, which implies we need 123 // to use this method instead of the normal TokenRead, which can only be 124 // used to lookup tokens by their AccessorID. 125 TokenReadSelf(q *api.QueryOptions) (*api.ACLToken, *api.QueryMeta, error) 126 PolicyRead(policyID string, q *api.QueryOptions) (*api.ACLPolicy, *api.QueryMeta, error) 127 RoleRead(roleID string, q *api.QueryOptions) (*api.ACLRole, *api.QueryMeta, error) 128 TokenCreate(partial *api.ACLToken, q *api.WriteOptions) (*api.ACLToken, *api.WriteMeta, error) 129 TokenDelete(accessorID string, q *api.WriteOptions) (*api.WriteMeta, error) 130 TokenList(q *api.QueryOptions) ([]*api.ACLTokenListEntry, *api.QueryMeta, error) 131 } 132 133 // agentServiceUpdateRequired checks if any critical fields in Nomad's version 134 // of a service definition are different from the existing service definition as 135 // known by Consul. 136 // 137 // reason - The syncReason that triggered this synchronization with the consul 138 // agent API. 139 // wanted - Nomad's view of what the service definition is intended to be. 140 // Not nil. 141 // existing - Consul's view (agent, not catalog) of the actual service definition. 142 // Not nil. 143 // sidecar - Consul's view (agent, not catalog) of the service definition of the sidecar 144 // associated with existing that may or may not exist. 145 // May be nil. 146 func agentServiceUpdateRequired(reason syncReason, wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool { 147 switch reason { 148 case syncPeriodic: 149 // In a periodic sync with Consul, we need to respect the value of 150 // the enable_tag_override field so that we maintain the illusion that the 151 // user is in control of the Consul tags, as they may be externally edited 152 // via the Consul catalog API (e.g. a user manually sets them). 153 // 154 // As Consul does by disabling anti-entropy for the tags field, Nomad will 155 // ignore differences in the tags field during the periodic syncs with 156 // the Consul agent API. 157 // 158 // We do so by over-writing the nomad service registration by the value 159 // of the tags that Consul contains, if enable_tag_override = true. 160 maybeTweakTags(wanted, existing, sidecar) 161 return different(wanted, existing, sidecar) 162 163 default: 164 // A non-periodic sync with Consul indicates an operation has been set 165 // on the queue. This happens when service has been added / removed / modified 166 // and implies the Consul agent should be sync'd with nomad, because 167 // nomad is the ultimate source of truth for the service definition. 168 return different(wanted, existing, sidecar) 169 } 170 } 171 172 // maybeTweakTags will override wanted.Tags with a copy of existing.Tags only if 173 // EnableTagOverride is true. Otherwise the wanted service registration is left 174 // unchanged. 175 func maybeTweakTags(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) { 176 if wanted.EnableTagOverride { 177 wanted.Tags = helper.CopySliceString(existing.Tags) 178 // If the service registration also defines a sidecar service, use the ETO 179 // setting for the parent service to also apply to the sidecar. 180 if wanted.Connect != nil && wanted.Connect.SidecarService != nil { 181 if sidecar != nil { 182 wanted.Connect.SidecarService.Tags = helper.CopySliceString(sidecar.Tags) 183 } 184 } 185 } 186 } 187 188 // different compares the wanted state of the service registration with the actual 189 // (cached) state of the service registration reported by Consul. If any of the 190 // critical fields are not deeply equal, they considered different. 191 func different(wanted *api.AgentServiceRegistration, existing *api.AgentService, sidecar *api.AgentService) bool { 192 switch { 193 case wanted.Kind != existing.Kind: 194 return true 195 case wanted.ID != existing.ID: 196 return true 197 case wanted.Port != existing.Port: 198 return true 199 case wanted.Address != existing.Address: 200 return true 201 case wanted.Name != existing.Service: 202 return true 203 case wanted.EnableTagOverride != existing.EnableTagOverride: 204 return true 205 case !reflect.DeepEqual(wanted.Meta, existing.Meta): 206 return true 207 case tagsDifferent(wanted.Tags, existing.Tags): 208 return true 209 case connectSidecarDifferent(wanted, sidecar): 210 return true 211 } 212 return false 213 } 214 215 func tagsDifferent(a, b []string) bool { 216 if len(a) != len(b) { 217 return true 218 } 219 220 for i, valueA := range a { 221 if b[i] != valueA { 222 return true 223 } 224 } 225 226 return false 227 } 228 229 // sidecarTagsDifferent includes the special logic for comparing sidecar tags 230 // from Nomad vs. Consul perspective. Because Consul forces the sidecar tags 231 // to inherit the parent service tags if the sidecar tags are unset, we need to 232 // take that into consideration when Nomad's sidecar tags are unset by instead 233 // comparing them to the parent service tags. 234 func sidecarTagsDifferent(parent, wanted, sidecar []string) bool { 235 if len(wanted) == 0 { 236 return tagsDifferent(parent, sidecar) 237 } 238 return tagsDifferent(wanted, sidecar) 239 } 240 241 // connectSidecarDifferent returns true if Nomad expects there to be a sidecar 242 // hanging off the desired parent service definition on the Consul side, and does 243 // not match with what Consul has. 244 func connectSidecarDifferent(wanted *api.AgentServiceRegistration, sidecar *api.AgentService) bool { 245 if wanted.Connect != nil && wanted.Connect.SidecarService != nil { 246 if sidecar == nil { 247 // consul lost our sidecar (?) 248 return true 249 } 250 251 if sidecarTagsDifferent(wanted.Tags, wanted.Connect.SidecarService.Tags, sidecar.Tags) { 252 // tags on the nomad definition have been modified 253 return true 254 } 255 } 256 257 // Either Nomad does not expect there to be a sidecar_service, or there is 258 // no actionable difference from the Consul sidecar_service definition. 259 return false 260 } 261 262 // operations are submitted to the main loop via commit() for synchronizing 263 // with Consul. 264 type operations struct { 265 regServices []*api.AgentServiceRegistration 266 regChecks []*api.AgentCheckRegistration 267 deregServices []string 268 deregChecks []string 269 } 270 271 // AllocRegistration holds the status of services registered for a particular 272 // allocations by task. 273 type AllocRegistration struct { 274 // Tasks maps the name of a task to its registered services and checks 275 Tasks map[string]*ServiceRegistrations 276 } 277 278 func (a *AllocRegistration) copy() *AllocRegistration { 279 c := &AllocRegistration{ 280 Tasks: make(map[string]*ServiceRegistrations, len(a.Tasks)), 281 } 282 283 for k, v := range a.Tasks { 284 c.Tasks[k] = v.copy() 285 } 286 287 return c 288 } 289 290 // NumServices returns the number of registered services 291 func (a *AllocRegistration) NumServices() int { 292 if a == nil { 293 return 0 294 } 295 296 total := 0 297 for _, treg := range a.Tasks { 298 for _, sreg := range treg.Services { 299 if sreg.Service != nil { 300 total++ 301 } 302 } 303 } 304 305 return total 306 } 307 308 // NumChecks returns the number of registered checks 309 func (a *AllocRegistration) NumChecks() int { 310 if a == nil { 311 return 0 312 } 313 314 total := 0 315 for _, treg := range a.Tasks { 316 for _, sreg := range treg.Services { 317 total += len(sreg.Checks) 318 } 319 } 320 321 return total 322 } 323 324 // ServiceRegistrations holds the status of services registered for a particular 325 // task or task group. 326 type ServiceRegistrations struct { 327 Services map[string]*ServiceRegistration 328 } 329 330 func (t *ServiceRegistrations) copy() *ServiceRegistrations { 331 c := &ServiceRegistrations{ 332 Services: make(map[string]*ServiceRegistration, len(t.Services)), 333 } 334 335 for k, v := range t.Services { 336 c.Services[k] = v.copy() 337 } 338 339 return c 340 } 341 342 // ServiceRegistration holds the status of a registered Consul Service and its 343 // Checks. 344 type ServiceRegistration struct { 345 // serviceID and checkIDs are internal fields that track just the IDs of the 346 // services/checks registered in Consul. It is used to materialize the other 347 // fields when queried. 348 serviceID string 349 checkIDs map[string]struct{} 350 351 // CheckOnUpdate is a map of checkIDs and the associated OnUpdate value 352 // from the ServiceCheck It is used to determine how a reported checks 353 // status should be evaluated. 354 CheckOnUpdate map[string]string 355 356 // Service is the AgentService registered in Consul. 357 Service *api.AgentService 358 359 // Checks is the status of the registered checks. 360 Checks []*api.AgentCheck 361 } 362 363 func (s *ServiceRegistration) copy() *ServiceRegistration { 364 // Copy does not copy the external fields but only the internal fields. This 365 // is so that the caller of AllocRegistrations can not access the internal 366 // fields and that method uses these fields to populate the external fields. 367 return &ServiceRegistration{ 368 serviceID: s.serviceID, 369 checkIDs: helper.CopyMapStringStruct(s.checkIDs), 370 CheckOnUpdate: helper.CopyMapStringString(s.CheckOnUpdate), 371 } 372 } 373 374 // ServiceClient handles task and agent service registration with Consul. 375 type ServiceClient struct { 376 client AgentAPI 377 logger log.Logger 378 retryInterval time.Duration 379 maxRetryInterval time.Duration 380 periodicInterval time.Duration 381 382 // exitCh is closed when the main Run loop exits 383 exitCh chan struct{} 384 385 // shutdownCh is closed when the client should shutdown 386 shutdownCh chan struct{} 387 388 // shutdownWait is how long Shutdown() blocks waiting for the final 389 // sync() to finish. Defaults to defaultShutdownWait 390 shutdownWait time.Duration 391 392 opCh chan *operations 393 394 services map[string]*api.AgentServiceRegistration 395 checks map[string]*api.AgentCheckRegistration 396 397 explicitlyDeregisteredServices map[string]bool 398 explicitlyDeregisteredChecks map[string]bool 399 400 // allocRegistrations stores the services and checks that are registered 401 // with Consul by allocation ID. 402 allocRegistrations map[string]*AllocRegistration 403 allocRegistrationsLock sync.RWMutex 404 405 // agent services and checks record entries for the agent itself which 406 // should be removed on shutdown 407 agentServices map[string]struct{} 408 agentChecks map[string]struct{} 409 agentLock sync.Mutex 410 411 // seen is 1 if Consul has ever been seen; otherwise 0. Accessed with 412 // atomics. 413 seen int32 414 415 // deregisterProbationExpiry is the time before which consul sync shouldn't deregister 416 // unknown services. 417 // Used to mitigate risk of deleting restored services upon client restart. 418 deregisterProbationExpiry time.Time 419 420 // checkWatcher restarts checks that are unhealthy. 421 checkWatcher *checkWatcher 422 423 // isClientAgent specifies whether this Consul client is being used 424 // by a Nomad client. 425 isClientAgent bool 426 } 427 428 // NewServiceClient creates a new Consul ServiceClient from an existing Consul API 429 // Client, logger and takes whether the client is being used by a Nomad Client agent. 430 // When being used by a Nomad client, this Consul client reconciles all services and 431 // checks created by Nomad on behalf of running tasks. 432 func NewServiceClient(consulClient AgentAPI, logger log.Logger, isNomadClient bool) *ServiceClient { 433 logger = logger.ResetNamed("consul.sync") 434 return &ServiceClient{ 435 client: consulClient, 436 logger: logger, 437 retryInterval: defaultRetryInterval, 438 maxRetryInterval: defaultMaxRetryInterval, 439 periodicInterval: defaultPeriodicInterval, 440 exitCh: make(chan struct{}), 441 shutdownCh: make(chan struct{}), 442 shutdownWait: defaultShutdownWait, 443 opCh: make(chan *operations, 8), 444 services: make(map[string]*api.AgentServiceRegistration), 445 checks: make(map[string]*api.AgentCheckRegistration), 446 explicitlyDeregisteredServices: make(map[string]bool), 447 explicitlyDeregisteredChecks: make(map[string]bool), 448 allocRegistrations: make(map[string]*AllocRegistration), 449 agentServices: make(map[string]struct{}), 450 agentChecks: make(map[string]struct{}), 451 checkWatcher: newCheckWatcher(logger, consulClient), 452 isClientAgent: isNomadClient, 453 deregisterProbationExpiry: time.Now().Add(deregisterProbationPeriod), 454 } 455 } 456 457 // seen is used by markSeen and hasSeen 458 const seen = 1 459 460 // markSeen marks Consul as having been seen (meaning at least one operation 461 // has succeeded). 462 func (c *ServiceClient) markSeen() { 463 atomic.StoreInt32(&c.seen, seen) 464 } 465 466 // hasSeen returns true if any Consul operation has ever succeeded. Useful to 467 // squelch errors if Consul isn't running. 468 func (c *ServiceClient) hasSeen() bool { 469 return atomic.LoadInt32(&c.seen) == seen 470 } 471 472 // syncReason indicates why a sync operation with consul is about to happen. 473 // 474 // The trigger for a sync may have implications on the behavior of the sync itself. 475 // In particular if a service is defined with enable_tag_override=true, the sync 476 // should ignore changes to the service's Tags field. 477 type syncReason byte 478 479 const ( 480 syncPeriodic = iota 481 syncShutdown 482 syncNewOps 483 ) 484 485 // Run the Consul main loop which retries operations against Consul. It should 486 // be called exactly once. 487 func (c *ServiceClient) Run() { 488 defer close(c.exitCh) 489 490 ctx, cancel := context.WithCancel(context.Background()) 491 defer cancel() 492 493 // init will be closed when Consul has been contacted 494 init := make(chan struct{}) 495 go checkConsulTLSSkipVerify(ctx, c.logger, c.client, init) 496 497 // Process operations while waiting for initial contact with Consul but 498 // do not sync until contact has been made. 499 INIT: 500 for { 501 select { 502 case <-init: 503 c.markSeen() 504 break INIT 505 case <-c.shutdownCh: 506 return 507 case ops := <-c.opCh: 508 c.merge(ops) 509 } 510 } 511 c.logger.Trace("able to contact Consul") 512 513 // Block until contact with Consul has been established 514 // Start checkWatcher 515 go c.checkWatcher.Run(ctx) 516 517 // Always immediately sync to reconcile Nomad and Consul's state 518 retryTimer := time.NewTimer(0) 519 520 failures := 0 521 for { 522 // On every iteration take note of what the trigger for the next sync 523 // was, so that it may be referenced during the sync itself. 524 var reasonForSync syncReason 525 526 select { 527 case <-retryTimer.C: 528 reasonForSync = syncPeriodic 529 case <-c.shutdownCh: 530 reasonForSync = syncShutdown 531 // Cancel check watcher but sync one last time 532 cancel() 533 case ops := <-c.opCh: 534 reasonForSync = syncNewOps 535 c.merge(ops) 536 } 537 538 if err := c.sync(reasonForSync); err != nil { 539 if failures == 0 { 540 // Log on the first failure 541 c.logger.Warn("failed to update services in Consul", "error", err) 542 } else if failures%10 == 0 { 543 // Log every 10th consecutive failure 544 c.logger.Error("still unable to update services in Consul", "failures", failures, "error", err) 545 } 546 547 failures++ 548 if !retryTimer.Stop() { 549 // Timer already expired, since the timer may 550 // or may not have been read in the select{} 551 // above, conditionally receive on it 552 select { 553 case <-retryTimer.C: 554 default: 555 } 556 } 557 backoff := c.retryInterval * time.Duration(failures) 558 if backoff > c.maxRetryInterval { 559 backoff = c.maxRetryInterval 560 } 561 retryTimer.Reset(backoff) 562 } else { 563 if failures > 0 { 564 c.logger.Info("successfully updated services in Consul") 565 failures = 0 566 } 567 568 // on successful sync, clear deregistered consul entities 569 c.clearExplicitlyDeregistered() 570 571 // Reset timer to periodic interval to periodically 572 // reconile with Consul 573 if !retryTimer.Stop() { 574 select { 575 case <-retryTimer.C: 576 default: 577 } 578 } 579 retryTimer.Reset(c.periodicInterval) 580 } 581 582 select { 583 case <-c.shutdownCh: 584 // Exit only after sync'ing all outstanding operations 585 if len(c.opCh) > 0 { 586 for len(c.opCh) > 0 { 587 c.merge(<-c.opCh) 588 } 589 continue 590 } 591 return 592 default: 593 } 594 595 } 596 } 597 598 // commit operations unless already shutting down. 599 func (c *ServiceClient) commit(ops *operations) { 600 select { 601 case c.opCh <- ops: 602 case <-c.shutdownCh: 603 } 604 } 605 606 func (c *ServiceClient) clearExplicitlyDeregistered() { 607 c.explicitlyDeregisteredServices = map[string]bool{} 608 c.explicitlyDeregisteredChecks = map[string]bool{} 609 } 610 611 // merge registrations into state map prior to sync'ing with Consul 612 func (c *ServiceClient) merge(ops *operations) { 613 for _, s := range ops.regServices { 614 c.services[s.ID] = s 615 } 616 for _, check := range ops.regChecks { 617 c.checks[check.ID] = check 618 } 619 for _, sid := range ops.deregServices { 620 delete(c.services, sid) 621 c.explicitlyDeregisteredServices[sid] = true 622 } 623 for _, cid := range ops.deregChecks { 624 delete(c.checks, cid) 625 c.explicitlyDeregisteredChecks[cid] = true 626 } 627 metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services))) 628 metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks))) 629 } 630 631 // sync enqueued operations. 632 func (c *ServiceClient) sync(reason syncReason) error { 633 sreg, creg, sdereg, cdereg := 0, 0, 0, 0 634 635 consulServices, err := c.client.Services() 636 if err != nil { 637 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 638 return fmt.Errorf("error querying Consul services: %v", err) 639 } 640 641 inProbation := time.Now().Before(c.deregisterProbationExpiry) 642 643 // Remove Nomad services in Consul but unknown locally 644 for id := range consulServices { 645 if _, ok := c.services[id]; ok { 646 // Known service, skip 647 continue 648 } 649 650 // Ignore if this is not a Nomad managed service. Also ignore 651 // Nomad managed services if this is not a client agent. 652 // This is to prevent server agents from removing services 653 // registered by client agents 654 if !isNomadService(id) || !c.isClientAgent { 655 // Not managed by Nomad, skip 656 continue 657 } 658 659 // Ignore unknown services during probation 660 if inProbation && !c.explicitlyDeregisteredServices[id] { 661 continue 662 } 663 664 // Ignore if this is a service for a Nomad managed sidecar proxy. 665 if isNomadSidecar(id, c.services) { 666 continue 667 } 668 669 // Unknown Nomad managed service; kill 670 if err := c.client.ServiceDeregister(id); err != nil { 671 if isOldNomadService(id) { 672 // Don't hard-fail on old entries. See #3620 673 continue 674 } 675 676 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 677 return err 678 } 679 sdereg++ 680 metrics.IncrCounter([]string{"client", "consul", "service_deregistrations"}, 1) 681 } 682 683 // Add Nomad services missing from Consul, or where the service has been updated. 684 for id, serviceInNomad := range c.services { 685 686 serviceInConsul, exists := consulServices[id] 687 sidecarInConsul := getNomadSidecar(id, consulServices) 688 689 if !exists || agentServiceUpdateRequired(reason, serviceInNomad, serviceInConsul, sidecarInConsul) { 690 if err = c.client.ServiceRegister(serviceInNomad); err != nil { 691 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 692 return err 693 } 694 sreg++ 695 metrics.IncrCounter([]string{"client", "consul", "service_registrations"}, 1) 696 } 697 698 } 699 700 consulChecks, err := c.client.Checks() 701 if err != nil { 702 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 703 return fmt.Errorf("error querying Consul checks: %v", err) 704 } 705 706 // Remove Nomad checks in Consul but unknown locally 707 for id, check := range consulChecks { 708 if _, ok := c.checks[id]; ok { 709 // Known check, leave it 710 continue 711 } 712 713 // Ignore if this is not a Nomad managed check. Also ignore 714 // Nomad managed checks if this is not a client agent. 715 // This is to prevent server agents from removing checks 716 // registered by client agents 717 if !isNomadService(check.ServiceID) || !c.isClientAgent || !isNomadCheck(check.CheckID) { 718 // Service not managed by Nomad, skip 719 continue 720 } 721 722 // Ignore unknown services during probation 723 if inProbation && !c.explicitlyDeregisteredChecks[id] { 724 continue 725 } 726 727 // Ignore if this is a check for a Nomad managed sidecar proxy. 728 if isNomadSidecar(check.ServiceID, c.services) { 729 continue 730 } 731 732 // Unknown Nomad managed check; remove 733 if err := c.client.CheckDeregister(id); err != nil { 734 if isOldNomadService(check.ServiceID) { 735 // Don't hard-fail on old entries. 736 continue 737 } 738 739 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 740 return err 741 } 742 cdereg++ 743 metrics.IncrCounter([]string{"client", "consul", "check_deregistrations"}, 1) 744 } 745 746 // Add Nomad checks missing from Consul 747 for id, check := range c.checks { 748 if _, ok := consulChecks[id]; ok { 749 // Already in Consul; skipping 750 continue 751 } 752 753 if err := c.client.CheckRegister(check); err != nil { 754 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 755 return err 756 } 757 creg++ 758 metrics.IncrCounter([]string{"client", "consul", "check_registrations"}, 1) 759 } 760 761 // Only log if something was actually synced 762 if sreg > 0 || sdereg > 0 || creg > 0 || cdereg > 0 { 763 c.logger.Debug("sync complete", "registered_services", sreg, "deregistered_services", sdereg, 764 "registered_checks", creg, "deregistered_checks", cdereg) 765 } 766 return nil 767 } 768 769 // RegisterAgent registers Nomad agents (client or server). The 770 // Service.PortLabel should be a literal port to be parsed with SplitHostPort. 771 // Script checks are not supported and will return an error. Registration is 772 // asynchronous. 773 // 774 // Agents will be deregistered when Shutdown is called. 775 func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error { 776 ops := operations{} 777 778 for _, service := range services { 779 id := makeAgentServiceID(role, service) 780 781 // Unlike tasks, agents don't use port labels. Agent ports are 782 // stored directly in the PortLabel. 783 host, rawport, err := net.SplitHostPort(service.PortLabel) 784 if err != nil { 785 return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err) 786 } 787 port, err := strconv.Atoi(rawport) 788 if err != nil { 789 return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err) 790 } 791 serviceReg := &api.AgentServiceRegistration{ 792 ID: id, 793 Name: service.Name, 794 Tags: service.Tags, 795 Address: host, 796 Port: port, 797 // This enables the consul UI to show that Nomad registered this service 798 Meta: map[string]string{ 799 "external-source": "nomad", 800 }, 801 } 802 ops.regServices = append(ops.regServices, serviceReg) 803 804 for _, check := range service.Checks { 805 checkID := MakeCheckID(id, check) 806 if check.Type == structs.ServiceCheckScript { 807 return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name) 808 } 809 checkHost, checkPort := serviceReg.Address, serviceReg.Port 810 if check.PortLabel != "" { 811 // Unlike tasks, agents don't use port labels. Agent ports are 812 // stored directly in the PortLabel. 813 host, rawport, err := net.SplitHostPort(check.PortLabel) 814 if err != nil { 815 return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err) 816 } 817 port, err := strconv.Atoi(rawport) 818 if err != nil { 819 return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err) 820 } 821 checkHost, checkPort = host, port 822 } 823 checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort) 824 if err != nil { 825 return fmt.Errorf("failed to add check %q: %v", check.Name, err) 826 } 827 ops.regChecks = append(ops.regChecks, checkReg) 828 } 829 } 830 831 // Don't bother committing agent checks if we're already shutting down 832 c.agentLock.Lock() 833 defer c.agentLock.Unlock() 834 select { 835 case <-c.shutdownCh: 836 return nil 837 default: 838 } 839 840 // Now add them to the registration queue 841 c.commit(&ops) 842 843 // Record IDs for deregistering on shutdown 844 for _, id := range ops.regServices { 845 c.agentServices[id.ID] = struct{}{} 846 } 847 for _, id := range ops.regChecks { 848 c.agentChecks[id.ID] = struct{}{} 849 } 850 return nil 851 } 852 853 // serviceRegs creates service registrations, check registrations, and script 854 // checks from a service. It returns a service registration object with the 855 // service and check IDs populated. 856 func (c *ServiceClient) serviceRegs(ops *operations, service *structs.Service, workload *WorkloadServices) ( 857 *ServiceRegistration, error) { 858 859 // Get the services ID 860 id := MakeAllocServiceID(workload.AllocID, workload.Name(), service) 861 sreg := &ServiceRegistration{ 862 serviceID: id, 863 checkIDs: make(map[string]struct{}, len(service.Checks)), 864 CheckOnUpdate: make(map[string]string, len(service.Checks)), 865 } 866 867 // Service address modes default to auto 868 addrMode := service.AddressMode 869 if addrMode == "" { 870 addrMode = structs.AddressModeAuto 871 } 872 873 // Determine the address to advertise based on the mode 874 ip, port, err := getAddress(addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus) 875 if err != nil { 876 return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err) 877 } 878 879 // Determine whether to use tags or canary_tags 880 var tags []string 881 if workload.Canary && len(service.CanaryTags) > 0 { 882 tags = make([]string, len(service.CanaryTags)) 883 copy(tags, service.CanaryTags) 884 } else { 885 tags = make([]string, len(service.Tags)) 886 copy(tags, service.Tags) 887 } 888 889 // newConnect returns (nil, nil) if there's no Connect-enabled service. 890 connect, err := newConnect(id, service.Name, service.Connect, workload.Networks, workload.Ports) 891 if err != nil { 892 return nil, fmt.Errorf("invalid Consul Connect configuration for service %q: %v", service.Name, err) 893 } 894 895 // newConnectGateway returns nil if there's no Connect gateway. 896 gateway := newConnectGateway(service.Name, service.Connect) 897 898 // Determine whether to use meta or canary_meta 899 var meta map[string]string 900 if workload.Canary && len(service.CanaryMeta) > 0 { 901 meta = make(map[string]string, len(service.CanaryMeta)+1) 902 for k, v := range service.CanaryMeta { 903 meta[k] = v 904 } 905 } else { 906 meta = make(map[string]string, len(service.Meta)+1) 907 for k, v := range service.Meta { 908 meta[k] = v 909 } 910 } 911 912 // This enables the consul UI to show that Nomad registered this service 913 meta["external-source"] = "nomad" 914 915 // Explicitly set the Consul service Kind in case this service represents 916 // one of the Connect gateway types. 917 kind := api.ServiceKindTypical 918 switch { 919 case service.Connect.IsIngress(): 920 kind = api.ServiceKindIngressGateway 921 case service.Connect.IsTerminating(): 922 kind = api.ServiceKindTerminatingGateway 923 // set the default port if bridge / default listener set 924 if defaultBind, exists := service.Connect.Gateway.Proxy.EnvoyGatewayBindAddresses["default"]; exists { 925 portLabel := fmt.Sprintf("%s-%s", structs.ConnectTerminatingPrefix, service.Name) 926 if dynPort, ok := workload.Ports.Get(portLabel); ok { 927 defaultBind.Port = dynPort.Value 928 } 929 } 930 } 931 932 // Build the Consul Service registration request 933 serviceReg := &api.AgentServiceRegistration{ 934 Kind: kind, 935 ID: id, 936 Name: service.Name, 937 Tags: tags, 938 EnableTagOverride: service.EnableTagOverride, 939 Address: ip, 940 Port: port, 941 Meta: meta, 942 Connect: connect, // will be nil if no Connect stanza 943 Proxy: gateway, // will be nil if no Connect Gateway stanza 944 } 945 ops.regServices = append(ops.regServices, serviceReg) 946 947 // Build the check registrations 948 checkRegs, err := c.checkRegs(id, service, workload, sreg) 949 if err != nil { 950 return nil, err 951 } 952 for _, registration := range checkRegs { 953 sreg.checkIDs[registration.ID] = struct{}{} 954 ops.regChecks = append(ops.regChecks, registration) 955 } 956 957 return sreg, nil 958 } 959 960 // checkRegs creates check registrations for the given service 961 func (c *ServiceClient) checkRegs(serviceID string, service *structs.Service, 962 workload *WorkloadServices, sreg *ServiceRegistration) ([]*api.AgentCheckRegistration, error) { 963 964 registrations := make([]*api.AgentCheckRegistration, 0, len(service.Checks)) 965 for _, check := range service.Checks { 966 var ip string 967 var port int 968 969 if check.Type != structs.ServiceCheckScript { 970 portLabel := check.PortLabel 971 if portLabel == "" { 972 portLabel = service.PortLabel 973 } 974 975 addrMode := check.AddressMode 976 if addrMode == "" { 977 // pre-#3380 compat 978 addrMode = structs.AddressModeHost 979 } 980 981 var err error 982 ip, port, err = getAddress(addrMode, portLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus) 983 if err != nil { 984 return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err) 985 } 986 } 987 988 checkID := MakeCheckID(serviceID, check) 989 registration, err := createCheckReg(serviceID, checkID, check, ip, port) 990 if err != nil { 991 return nil, fmt.Errorf("failed to add check %q: %v", check.Name, err) 992 } 993 sreg.CheckOnUpdate[checkID] = check.OnUpdate 994 995 registrations = append(registrations, registration) 996 } 997 998 return registrations, nil 999 } 1000 1001 // RegisterWorkload with Consul. Adds all service entries and checks to Consul. 1002 // 1003 // If the service IP is set it used as the address in the service registration. 1004 // Checks will always use the IP from the Task struct (host's IP). 1005 // 1006 // Actual communication with Consul is done asynchronously (see Run). 1007 func (c *ServiceClient) RegisterWorkload(workload *WorkloadServices) error { 1008 // Fast path 1009 numServices := len(workload.Services) 1010 if numServices == 0 { 1011 return nil 1012 } 1013 1014 t := new(ServiceRegistrations) 1015 t.Services = make(map[string]*ServiceRegistration, numServices) 1016 1017 ops := &operations{} 1018 for _, service := range workload.Services { 1019 sreg, err := c.serviceRegs(ops, service, workload) 1020 if err != nil { 1021 return err 1022 } 1023 t.Services[sreg.serviceID] = sreg 1024 } 1025 1026 // Add the workload to the allocation's registration 1027 c.addRegistrations(workload.AllocID, workload.Name(), t) 1028 1029 c.commit(ops) 1030 1031 // Start watching checks. Done after service registrations are built 1032 // since an error building them could leak watches. 1033 for _, service := range workload.Services { 1034 serviceID := MakeAllocServiceID(workload.AllocID, workload.Name(), service) 1035 for _, check := range service.Checks { 1036 if check.TriggersRestarts() { 1037 checkID := MakeCheckID(serviceID, check) 1038 c.checkWatcher.Watch(workload.AllocID, workload.Name(), checkID, check, workload.Restarter) 1039 } 1040 } 1041 } 1042 return nil 1043 } 1044 1045 // UpdateWorkload in Consul. Does not alter the service if only checks have 1046 // changed. 1047 // 1048 // DriverNetwork must not change between invocations for the same allocation. 1049 func (c *ServiceClient) UpdateWorkload(old, newWorkload *WorkloadServices) error { 1050 ops := new(operations) 1051 regs := new(ServiceRegistrations) 1052 regs.Services = make(map[string]*ServiceRegistration, len(newWorkload.Services)) 1053 1054 newIDs := make(map[string]*structs.Service, len(newWorkload.Services)) 1055 for _, s := range newWorkload.Services { 1056 newIDs[MakeAllocServiceID(newWorkload.AllocID, newWorkload.Name(), s)] = s 1057 } 1058 1059 // Loop over existing Services to see if they have been removed 1060 for _, existingSvc := range old.Services { 1061 existingID := MakeAllocServiceID(old.AllocID, old.Name(), existingSvc) 1062 newSvc, ok := newIDs[existingID] 1063 1064 if !ok { 1065 // Existing service entry removed 1066 ops.deregServices = append(ops.deregServices, existingID) 1067 for _, check := range existingSvc.Checks { 1068 cid := MakeCheckID(existingID, check) 1069 ops.deregChecks = append(ops.deregChecks, cid) 1070 1071 // Unwatch watched checks 1072 if check.TriggersRestarts() { 1073 c.checkWatcher.Unwatch(cid) 1074 } 1075 } 1076 continue 1077 } 1078 1079 oldHash := existingSvc.Hash(old.AllocID, old.Name(), old.Canary) 1080 newHash := newSvc.Hash(newWorkload.AllocID, newWorkload.Name(), newWorkload.Canary) 1081 if oldHash == newHash { 1082 // Service exists and hasn't changed, don't re-add it later 1083 delete(newIDs, existingID) 1084 } 1085 1086 // Service still exists so add it to the task's registration 1087 sreg := &ServiceRegistration{ 1088 serviceID: existingID, 1089 checkIDs: make(map[string]struct{}, len(newSvc.Checks)), 1090 CheckOnUpdate: make(map[string]string, len(newSvc.Checks)), 1091 } 1092 regs.Services[existingID] = sreg 1093 1094 // See if any checks were updated 1095 existingChecks := make(map[string]*structs.ServiceCheck, len(existingSvc.Checks)) 1096 for _, check := range existingSvc.Checks { 1097 existingChecks[MakeCheckID(existingID, check)] = check 1098 } 1099 1100 // Register new checks 1101 for _, check := range newSvc.Checks { 1102 checkID := MakeCheckID(existingID, check) 1103 if _, exists := existingChecks[checkID]; exists { 1104 // Check is still required. Remove it from the map so it doesn't get 1105 // deleted later. 1106 delete(existingChecks, checkID) 1107 sreg.checkIDs[checkID] = struct{}{} 1108 sreg.CheckOnUpdate[checkID] = check.OnUpdate 1109 } 1110 1111 // New check on an unchanged service; add them now 1112 checkRegs, err := c.checkRegs(existingID, newSvc, newWorkload, sreg) 1113 if err != nil { 1114 return err 1115 } 1116 1117 for _, registration := range checkRegs { 1118 sreg.checkIDs[registration.ID] = struct{}{} 1119 sreg.CheckOnUpdate[registration.ID] = check.OnUpdate 1120 ops.regChecks = append(ops.regChecks, registration) 1121 } 1122 1123 // Update all watched checks as CheckRestart fields aren't part of ID 1124 if check.TriggersRestarts() { 1125 c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter) 1126 } 1127 } 1128 1129 // Remove existing checks not in updated service 1130 for cid, check := range existingChecks { 1131 ops.deregChecks = append(ops.deregChecks, cid) 1132 1133 // Unwatch checks 1134 if check.TriggersRestarts() { 1135 c.checkWatcher.Unwatch(cid) 1136 } 1137 } 1138 } 1139 1140 // Any remaining services should just be enqueued directly 1141 for _, newSvc := range newIDs { 1142 sreg, err := c.serviceRegs(ops, newSvc, newWorkload) 1143 if err != nil { 1144 return err 1145 } 1146 1147 regs.Services[sreg.serviceID] = sreg 1148 } 1149 1150 // Add the task to the allocation's registration 1151 c.addRegistrations(newWorkload.AllocID, newWorkload.Name(), regs) 1152 1153 c.commit(ops) 1154 1155 // Start watching checks. Done after service registrations are built 1156 // since an error building them could leak watches. 1157 for serviceID, service := range newIDs { 1158 for _, check := range service.Checks { 1159 if check.TriggersRestarts() { 1160 checkID := MakeCheckID(serviceID, check) 1161 c.checkWatcher.Watch(newWorkload.AllocID, newWorkload.Name(), checkID, check, newWorkload.Restarter) 1162 } 1163 } 1164 } 1165 1166 return nil 1167 } 1168 1169 // RemoveWorkload from Consul. Removes all service entries and checks. 1170 // 1171 // Actual communication with Consul is done asynchronously (see Run). 1172 func (c *ServiceClient) RemoveWorkload(workload *WorkloadServices) { 1173 ops := operations{} 1174 1175 for _, service := range workload.Services { 1176 id := MakeAllocServiceID(workload.AllocID, workload.Name(), service) 1177 ops.deregServices = append(ops.deregServices, id) 1178 1179 for _, check := range service.Checks { 1180 cid := MakeCheckID(id, check) 1181 ops.deregChecks = append(ops.deregChecks, cid) 1182 1183 if check.TriggersRestarts() { 1184 c.checkWatcher.Unwatch(cid) 1185 } 1186 } 1187 } 1188 1189 // Remove the workload from the alloc's registrations 1190 c.removeRegistration(workload.AllocID, workload.Name()) 1191 1192 // Now add them to the deregistration fields; main Run loop will update 1193 c.commit(&ops) 1194 } 1195 1196 // AllocRegistrations returns the registrations for the given allocation. If the 1197 // allocation has no reservations, the response is a nil object. 1198 func (c *ServiceClient) AllocRegistrations(allocID string) (*AllocRegistration, error) { 1199 // Get the internal struct using the lock 1200 c.allocRegistrationsLock.RLock() 1201 regInternal, ok := c.allocRegistrations[allocID] 1202 if !ok { 1203 c.allocRegistrationsLock.RUnlock() 1204 return nil, nil 1205 } 1206 1207 // Copy so we don't expose internal structs 1208 reg := regInternal.copy() 1209 c.allocRegistrationsLock.RUnlock() 1210 1211 // Query the services and checks to populate the allocation registrations. 1212 services, err := c.client.Services() 1213 if err != nil { 1214 return nil, err 1215 } 1216 1217 checks, err := c.client.Checks() 1218 if err != nil { 1219 return nil, err 1220 } 1221 1222 // Populate the object 1223 for _, treg := range reg.Tasks { 1224 for serviceID, sreg := range treg.Services { 1225 sreg.Service = services[serviceID] 1226 for checkID := range sreg.checkIDs { 1227 if check, ok := checks[checkID]; ok { 1228 sreg.Checks = append(sreg.Checks, check) 1229 } 1230 } 1231 } 1232 } 1233 1234 return reg, nil 1235 } 1236 1237 // UpdateTTL is used to update the TTL of a check. Typically this will only be 1238 // called to heartbeat script checks. 1239 func (c *ServiceClient) UpdateTTL(id, output, status string) error { 1240 return c.client.UpdateTTL(id, output, status) 1241 } 1242 1243 // Shutdown the Consul client. Update running task registrations and deregister 1244 // agent from Consul. On first call blocks up to shutdownWait before giving up 1245 // on syncing operations. 1246 func (c *ServiceClient) Shutdown() error { 1247 // Serialize Shutdown calls with RegisterAgent to prevent leaking agent 1248 // entries. 1249 c.agentLock.Lock() 1250 defer c.agentLock.Unlock() 1251 select { 1252 case <-c.shutdownCh: 1253 return nil 1254 default: 1255 close(c.shutdownCh) 1256 } 1257 1258 // Give run loop time to sync, but don't block indefinitely 1259 deadline := time.After(c.shutdownWait) 1260 1261 // Wait for Run to finish any outstanding operations and exit 1262 select { 1263 case <-c.exitCh: 1264 case <-deadline: 1265 // Don't wait forever though 1266 } 1267 1268 // If Consul was never seen nothing could be written so exit early 1269 if !c.hasSeen() { 1270 return nil 1271 } 1272 1273 // Always attempt to deregister Nomad agent Consul entries, even if 1274 // deadline was reached 1275 for id := range c.agentServices { 1276 if err := c.client.ServiceDeregister(id); err != nil { 1277 c.logger.Error("failed deregistering agent service", "service_id", id, "error", err) 1278 } 1279 } 1280 1281 remainingChecks, err := c.client.Checks() 1282 if err != nil { 1283 c.logger.Error("failed listing remaining checks after deregistering services", "error", err) 1284 } 1285 1286 checkRemains := func(id string) bool { 1287 for _, c := range remainingChecks { 1288 if c.CheckID == id { 1289 return true 1290 } 1291 } 1292 return false 1293 } 1294 1295 for id := range c.agentChecks { 1296 // if we couldn't populate remainingChecks it is unlikely that CheckDeregister will work, but try anyway 1297 // if we could list the remaining checks, verify that the check we store still exists before removing it. 1298 if remainingChecks == nil || checkRemains(id) { 1299 if err := c.client.CheckDeregister(id); err != nil { 1300 c.logger.Error("failed deregistering agent check", "check_id", id, "error", err) 1301 } 1302 } 1303 } 1304 1305 return nil 1306 } 1307 1308 // addRegistration adds the service registrations for the given allocation. 1309 func (c *ServiceClient) addRegistrations(allocID, taskName string, reg *ServiceRegistrations) { 1310 c.allocRegistrationsLock.Lock() 1311 defer c.allocRegistrationsLock.Unlock() 1312 1313 alloc, ok := c.allocRegistrations[allocID] 1314 if !ok { 1315 alloc = &AllocRegistration{ 1316 Tasks: make(map[string]*ServiceRegistrations), 1317 } 1318 c.allocRegistrations[allocID] = alloc 1319 } 1320 alloc.Tasks[taskName] = reg 1321 } 1322 1323 // removeRegistrations removes the registration for the given allocation. 1324 func (c *ServiceClient) removeRegistration(allocID, taskName string) { 1325 c.allocRegistrationsLock.Lock() 1326 defer c.allocRegistrationsLock.Unlock() 1327 1328 alloc, ok := c.allocRegistrations[allocID] 1329 if !ok { 1330 return 1331 } 1332 1333 // Delete the task and if it is the last one also delete the alloc's 1334 // registration 1335 delete(alloc.Tasks, taskName) 1336 if len(alloc.Tasks) == 0 { 1337 delete(c.allocRegistrations, allocID) 1338 } 1339 } 1340 1341 // makeAgentServiceID creates a unique ID for identifying an agent service in 1342 // Consul. 1343 // 1344 // Agent service IDs are of the form: 1345 // 1346 // {nomadServicePrefix}-{ROLE}-b32(sha1({Service.Name}-{Service.Tags...}) 1347 // Example Server ID: _nomad-server-fbbk265qn4tmt25nd4ep42tjvmyj3hr4 1348 // Example Client ID: _nomad-client-ggnjpgl7yn7rgmvxzilmpvrzzvrszc7l 1349 // 1350 func makeAgentServiceID(role string, service *structs.Service) string { 1351 return fmt.Sprintf("%s-%s-%s", nomadServicePrefix, role, service.Hash(role, "", false)) 1352 } 1353 1354 // MakeAllocServiceID creates a unique ID for identifying an alloc service in 1355 // Consul. 1356 // 1357 // Example Service ID: _nomad-task-b4e61df9-b095-d64e-f241-23860da1375f-redis-http-http 1358 func MakeAllocServiceID(allocID, taskName string, service *structs.Service) string { 1359 return fmt.Sprintf("%s%s-%s-%s-%s", nomadTaskPrefix, allocID, taskName, service.Name, service.PortLabel) 1360 } 1361 1362 // MakeCheckID creates a unique ID for a check. 1363 // 1364 // Example Check ID: _nomad-check-434ae42f9a57c5705344974ac38de2aee0ee089d 1365 func MakeCheckID(serviceID string, check *structs.ServiceCheck) string { 1366 return fmt.Sprintf("%s%s", nomadCheckPrefix, check.Hash(serviceID)) 1367 } 1368 1369 // createCheckReg creates a Check that can be registered with Consul. 1370 // 1371 // Script checks simply have a TTL set and the caller is responsible for 1372 // running the script and heart-beating. 1373 func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) { 1374 chkReg := api.AgentCheckRegistration{ 1375 ID: checkID, 1376 Name: check.Name, 1377 ServiceID: serviceID, 1378 } 1379 chkReg.Status = check.InitialStatus 1380 chkReg.Timeout = check.Timeout.String() 1381 chkReg.Interval = check.Interval.String() 1382 chkReg.SuccessBeforePassing = check.SuccessBeforePassing 1383 chkReg.FailuresBeforeCritical = check.FailuresBeforeCritical 1384 1385 // Require an address for http or tcp checks 1386 if port == 0 && check.RequiresPort() { 1387 return nil, fmt.Errorf("%s checks require an address", check.Type) 1388 } 1389 1390 switch check.Type { 1391 case structs.ServiceCheckHTTP: 1392 proto := check.Protocol 1393 if proto == "" { 1394 proto = "http" 1395 } 1396 if check.TLSSkipVerify { 1397 chkReg.TLSSkipVerify = true 1398 } 1399 base := url.URL{ 1400 Scheme: proto, 1401 Host: net.JoinHostPort(host, strconv.Itoa(port)), 1402 } 1403 relative, err := url.Parse(check.Path) 1404 if err != nil { 1405 return nil, err 1406 } 1407 checkURL := base.ResolveReference(relative) 1408 chkReg.HTTP = checkURL.String() 1409 chkReg.Method = check.Method 1410 chkReg.Header = check.Header 1411 1412 case structs.ServiceCheckTCP: 1413 chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port)) 1414 1415 case structs.ServiceCheckScript: 1416 chkReg.TTL = (check.Interval + ttlCheckBuffer).String() 1417 // As of Consul 1.0.0 setting TTL and Interval is a 400 1418 chkReg.Interval = "" 1419 1420 case structs.ServiceCheckGRPC: 1421 chkReg.GRPC = fmt.Sprintf("%s/%s", net.JoinHostPort(host, strconv.Itoa(port)), check.GRPCService) 1422 chkReg.GRPCUseTLS = check.GRPCUseTLS 1423 if check.TLSSkipVerify { 1424 chkReg.TLSSkipVerify = true 1425 } 1426 1427 default: 1428 return nil, fmt.Errorf("check type %+q not valid", check.Type) 1429 } 1430 return &chkReg, nil 1431 } 1432 1433 // isNomadCheck returns true if the ID matches the pattern of a Nomad managed 1434 // check. 1435 func isNomadCheck(id string) bool { 1436 return strings.HasPrefix(id, nomadCheckPrefix) 1437 } 1438 1439 // isNomadService returns true if the ID matches the pattern of a Nomad managed 1440 // service (new or old formats). Agent services return false as independent 1441 // client and server agents may be running on the same machine. #2827 1442 func isNomadService(id string) bool { 1443 return strings.HasPrefix(id, nomadTaskPrefix) || isOldNomadService(id) 1444 } 1445 1446 // isOldNomadService returns true if the ID matches an old pattern managed by 1447 // Nomad. 1448 // 1449 // Pre-0.7.1 task service IDs are of the form: 1450 // 1451 // {nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...} 1452 // Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3 1453 // 1454 func isOldNomadService(id string) bool { 1455 const prefix = nomadServicePrefix + "-executor" 1456 return strings.HasPrefix(id, prefix) 1457 } 1458 1459 const ( 1460 sidecarSuffix = "-sidecar-proxy" 1461 ) 1462 1463 // isNomadSidecar returns true if the ID matches a sidecar proxy for a Nomad 1464 // managed service. 1465 // 1466 // For example if you have a Connect enabled service with the ID: 1467 // 1468 // _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db 1469 // 1470 // Consul will create a service for the sidecar proxy with the ID: 1471 // 1472 // _nomad-task-5229c7f8-376b-3ccc-edd9-981e238f7033-cache-redis-cache-db-sidecar-proxy 1473 // 1474 func isNomadSidecar(id string, services map[string]*api.AgentServiceRegistration) bool { 1475 if !strings.HasSuffix(id, sidecarSuffix) { 1476 return false 1477 } 1478 1479 // Make sure the Nomad managed service for this proxy still exists. 1480 _, ok := services[id[:len(id)-len(sidecarSuffix)]] 1481 return ok 1482 } 1483 1484 // getNomadSidecar returns the service registration of the sidecar for the managed 1485 // service with the specified id. 1486 // 1487 // If the managed service of the specified id does not exist, or the service does 1488 // not have a sidecar proxy, nil is returned. 1489 func getNomadSidecar(id string, services map[string]*api.AgentService) *api.AgentService { 1490 if _, exists := services[id]; !exists { 1491 return nil 1492 } 1493 1494 sidecarID := id + sidecarSuffix 1495 return services[sidecarID] 1496 } 1497 1498 // getAddress returns the IP and port to use for a service or check. If no port 1499 // label is specified (an empty value), zero values are returned because no 1500 // address could be resolved. 1501 func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet *drivers.DriverNetwork, ports structs.AllocatedPorts, netStatus *structs.AllocNetworkStatus) (string, int, error) { 1502 switch addrMode { 1503 case structs.AddressModeAuto: 1504 if driverNet.Advertise() { 1505 addrMode = structs.AddressModeDriver 1506 } else { 1507 addrMode = structs.AddressModeHost 1508 } 1509 return getAddress(addrMode, portLabel, networks, driverNet, ports, netStatus) 1510 case structs.AddressModeHost: 1511 if portLabel == "" { 1512 if len(networks) != 1 { 1513 // If no networks are specified return zero 1514 // values. Consul will advertise the host IP 1515 // with no port. This is the pre-0.7.1 behavior 1516 // some people rely on. 1517 return "", 0, nil 1518 } 1519 1520 return networks[0].IP, 0, nil 1521 } 1522 1523 // Default path: use host ip:port 1524 // Try finding port in the AllocatedPorts struct first 1525 // Check in Networks struct for backwards compatibility if not found 1526 mapping, ok := ports.Get(portLabel) 1527 if !ok { 1528 mapping = networks.Port(portLabel) 1529 if mapping.Value > 0 { 1530 return mapping.HostIP, mapping.Value, nil 1531 } 1532 1533 // If port isn't a label, try to parse it as a literal port number 1534 port, err := strconv.Atoi(portLabel) 1535 if err != nil { 1536 // Don't include Atoi error message as user likely 1537 // never intended it to be a numeric and it creates a 1538 // confusing error message 1539 return "", 0, fmt.Errorf("invalid port %q: port label not found", portLabel) 1540 } 1541 if port <= 0 { 1542 return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel) 1543 } 1544 1545 // A number was given which will use the Consul agent's address and the given port 1546 // Returning a blank string as an address will use the Consul agent's address 1547 return "", port, nil 1548 } 1549 return mapping.HostIP, mapping.Value, nil 1550 1551 case structs.AddressModeDriver: 1552 // Require a driver network if driver address mode is used 1553 if driverNet == nil { 1554 return "", 0, fmt.Errorf(`cannot use address_mode="driver": no driver network exists`) 1555 } 1556 1557 // If no port label is specified just return the IP 1558 if portLabel == "" { 1559 return driverNet.IP, 0, nil 1560 } 1561 1562 // If the port is a label, use the driver's port (not the host's) 1563 if port, ok := ports.Get(portLabel); ok { 1564 return driverNet.IP, port.To, nil 1565 } 1566 1567 // Check if old style driver portmap is used 1568 if port, ok := driverNet.PortMap[portLabel]; ok { 1569 return driverNet.IP, port, nil 1570 } 1571 1572 // If port isn't a label, try to parse it as a literal port number 1573 port, err := strconv.Atoi(portLabel) 1574 if err != nil { 1575 // Don't include Atoi error message as user likely 1576 // never intended it to be a numeric and it creates a 1577 // confusing error message 1578 return "", 0, fmt.Errorf("invalid port label %q: port labels in driver address_mode must be numeric or in the driver's port map", portLabel) 1579 } 1580 if port <= 0 { 1581 return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel) 1582 } 1583 1584 return driverNet.IP, port, nil 1585 1586 case structs.AddressModeAlloc: 1587 if netStatus == nil { 1588 return "", 0, fmt.Errorf(`cannot use address_mode="alloc": no allocation network status reported`) 1589 } 1590 1591 // If no port label is specified just return the IP 1592 if portLabel == "" { 1593 return netStatus.Address, 0, nil 1594 } 1595 1596 // If port is a label and is found then return it 1597 if port, ok := ports.Get(portLabel); ok { 1598 // Use port.To value unless not set 1599 if port.To > 0 { 1600 return netStatus.Address, port.To, nil 1601 } 1602 return netStatus.Address, port.Value, nil 1603 } 1604 1605 // Check if port is a literal number 1606 port, err := strconv.Atoi(portLabel) 1607 if err != nil { 1608 // User likely specified wrong port label here 1609 return "", 0, fmt.Errorf("invalid port %q: port label not found or is not numeric", portLabel) 1610 } 1611 if port <= 0 { 1612 return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel) 1613 } 1614 return netStatus.Address, port, nil 1615 1616 default: 1617 // Shouldn't happen due to validation, but enforce invariants 1618 return "", 0, fmt.Errorf("invalid address mode %q", addrMode) 1619 } 1620 }