github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/command/agent/consul/client.go (about) 1 package consul 2 3 import ( 4 "fmt" 5 "log" 6 "net" 7 "net/url" 8 "strconv" 9 "strings" 10 "sync" 11 "sync/atomic" 12 "time" 13 14 metrics "github.com/armon/go-metrics" 15 "github.com/hashicorp/consul/api" 16 "github.com/hashicorp/nomad/client/driver" 17 cstructs "github.com/hashicorp/nomad/client/structs" 18 "github.com/hashicorp/nomad/nomad/structs" 19 ) 20 21 const ( 22 // nomadServicePrefix is the first prefix that scopes all Nomad registered 23 // services 24 nomadServicePrefix = "_nomad" 25 26 // defaultRetryInterval is how quickly to retry syncing services and 27 // checks to Consul when an error occurs. Will backoff up to a max. 28 defaultRetryInterval = time.Second 29 30 // defaultMaxRetryInterval is the default max retry interval. 31 defaultMaxRetryInterval = 30 * time.Second 32 33 // ttlCheckBuffer is the time interval that Nomad can take to report Consul 34 // the check result 35 ttlCheckBuffer = 31 * time.Second 36 37 // defaultShutdownWait is how long Shutdown() should block waiting for 38 // enqueued operations to sync to Consul by default. 39 defaultShutdownWait = time.Minute 40 41 // DefaultQueryWaitDuration is the max duration the Consul Agent will 42 // spend waiting for a response from a Consul Query. 43 DefaultQueryWaitDuration = 2 * time.Second 44 45 // ServiceTagHTTP is the tag assigned to HTTP services 46 ServiceTagHTTP = "http" 47 48 // ServiceTagRPC is the tag assigned to RPC services 49 ServiceTagRPC = "rpc" 50 51 // ServiceTagSerf is the tag assigned to Serf services 52 ServiceTagSerf = "serf" 53 ) 54 55 // CatalogAPI is the consul/api.Catalog API used by Nomad. 56 type CatalogAPI interface { 57 Datacenters() ([]string, error) 58 Service(service, tag string, q *api.QueryOptions) ([]*api.CatalogService, *api.QueryMeta, error) 59 } 60 61 // AgentAPI is the consul/api.Agent API used by Nomad. 62 type AgentAPI interface { 63 Services() (map[string]*api.AgentService, error) 64 Checks() (map[string]*api.AgentCheck, error) 65 CheckRegister(check *api.AgentCheckRegistration) error 66 CheckDeregister(checkID string) error 67 ServiceRegister(service *api.AgentServiceRegistration) error 68 ServiceDeregister(serviceID string) error 69 UpdateTTL(id, output, status string) error 70 } 71 72 // addrParser is usually the Task.FindHostAndPortFor method for turning a 73 // portLabel into an address and port. 74 type addrParser func(portLabel string) (string, int) 75 76 // operations are submitted to the main loop via commit() for synchronizing 77 // with Consul. 78 type operations struct { 79 regServices []*api.AgentServiceRegistration 80 regChecks []*api.AgentCheckRegistration 81 scripts []*scriptCheck 82 83 deregServices []string 84 deregChecks []string 85 } 86 87 // ServiceClient handles task and agent service registration with Consul. 88 type ServiceClient struct { 89 client AgentAPI 90 logger *log.Logger 91 retryInterval time.Duration 92 maxRetryInterval time.Duration 93 94 // skipVerifySupport is true if the local Consul agent suppots TLSSkipVerify 95 skipVerifySupport bool 96 97 // exitCh is closed when the main Run loop exits 98 exitCh chan struct{} 99 100 // shutdownCh is closed when the client should shutdown 101 shutdownCh chan struct{} 102 103 // shutdownWait is how long Shutdown() blocks waiting for the final 104 // sync() to finish. Defaults to defaultShutdownWait 105 shutdownWait time.Duration 106 107 opCh chan *operations 108 109 services map[string]*api.AgentServiceRegistration 110 checks map[string]*api.AgentCheckRegistration 111 scripts map[string]*scriptCheck 112 runningScripts map[string]*scriptHandle 113 114 // agent services and checks record entries for the agent itself which 115 // should be removed on shutdown 116 agentServices map[string]struct{} 117 agentChecks map[string]struct{} 118 agentLock sync.Mutex 119 120 // seen is 1 if Consul has ever been seen; otherise 0. Accessed with 121 // atomics. 122 seen int64 123 } 124 125 // NewServiceClient creates a new Consul ServiceClient from an existing Consul API 126 // Client and logger. 127 func NewServiceClient(consulClient AgentAPI, skipVerifySupport bool, logger *log.Logger) *ServiceClient { 128 return &ServiceClient{ 129 client: consulClient, 130 skipVerifySupport: skipVerifySupport, 131 logger: logger, 132 retryInterval: defaultRetryInterval, 133 maxRetryInterval: defaultMaxRetryInterval, 134 exitCh: make(chan struct{}), 135 shutdownCh: make(chan struct{}), 136 shutdownWait: defaultShutdownWait, 137 opCh: make(chan *operations, 8), 138 services: make(map[string]*api.AgentServiceRegistration), 139 checks: make(map[string]*api.AgentCheckRegistration), 140 scripts: make(map[string]*scriptCheck), 141 runningScripts: make(map[string]*scriptHandle), 142 agentServices: make(map[string]struct{}), 143 agentChecks: make(map[string]struct{}), 144 } 145 } 146 147 // seen is used by markSeen and hasSeen 148 const seen = 1 149 150 // markSeen marks Consul as having been seen (meaning at least one operation 151 // has succeeded). 152 func (c *ServiceClient) markSeen() { 153 atomic.StoreInt64(&c.seen, seen) 154 } 155 156 // hasSeen returns true if any Consul operation has ever succeeded. Useful to 157 // squelch errors if Consul isn't running. 158 func (c *ServiceClient) hasSeen() bool { 159 return atomic.LoadInt64(&c.seen) == seen 160 } 161 162 // Run the Consul main loop which retries operations against Consul. It should 163 // be called exactly once. 164 func (c *ServiceClient) Run() { 165 defer close(c.exitCh) 166 retryTimer := time.NewTimer(0) 167 <-retryTimer.C // disabled by default 168 failures := 0 169 for { 170 select { 171 case <-retryTimer.C: 172 case <-c.shutdownCh: 173 case ops := <-c.opCh: 174 c.merge(ops) 175 } 176 177 if err := c.sync(); err != nil { 178 if failures == 0 { 179 c.logger.Printf("[WARN] consul.sync: failed to update services in Consul: %v", err) 180 } 181 failures++ 182 if !retryTimer.Stop() { 183 // Timer already expired, since the timer may 184 // or may not have been read in the select{} 185 // above, conditionally receive on it 186 select { 187 case <-retryTimer.C: 188 default: 189 } 190 } 191 backoff := c.retryInterval * time.Duration(failures) 192 if backoff > c.maxRetryInterval { 193 backoff = c.maxRetryInterval 194 } 195 retryTimer.Reset(backoff) 196 } else { 197 if failures > 0 { 198 c.logger.Printf("[INFO] consul.sync: successfully updated services in Consul") 199 failures = 0 200 } 201 } 202 203 select { 204 case <-c.shutdownCh: 205 // Exit only after sync'ing all outstanding operations 206 if len(c.opCh) > 0 { 207 for len(c.opCh) > 0 { 208 c.merge(<-c.opCh) 209 } 210 continue 211 } 212 return 213 default: 214 } 215 216 } 217 } 218 219 // commit operations unless already shutting down. 220 func (c *ServiceClient) commit(ops *operations) { 221 select { 222 case c.opCh <- ops: 223 case <-c.shutdownCh: 224 } 225 } 226 227 // merge registrations into state map prior to sync'ing with Consul 228 func (c *ServiceClient) merge(ops *operations) { 229 for _, s := range ops.regServices { 230 c.services[s.ID] = s 231 } 232 for _, check := range ops.regChecks { 233 c.checks[check.ID] = check 234 } 235 for _, s := range ops.scripts { 236 c.scripts[s.id] = s 237 } 238 for _, sid := range ops.deregServices { 239 delete(c.services, sid) 240 } 241 for _, cid := range ops.deregChecks { 242 if script, ok := c.runningScripts[cid]; ok { 243 script.cancel() 244 delete(c.scripts, cid) 245 delete(c.runningScripts, cid) 246 } 247 delete(c.checks, cid) 248 } 249 metrics.SetGauge([]string{"client", "consul", "services"}, float32(len(c.services))) 250 metrics.SetGauge([]string{"client", "consul", "checks"}, float32(len(c.checks))) 251 metrics.SetGauge([]string{"client", "consul", "script_checks"}, float32(len(c.runningScripts))) 252 } 253 254 // sync enqueued operations. 255 func (c *ServiceClient) sync() error { 256 sreg, creg, sdereg, cdereg := 0, 0, 0, 0 257 258 consulServices, err := c.client.Services() 259 if err != nil { 260 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 261 return fmt.Errorf("error querying Consul services: %v", err) 262 } 263 264 consulChecks, err := c.client.Checks() 265 if err != nil { 266 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 267 return fmt.Errorf("error querying Consul checks: %v", err) 268 } 269 270 // Remove Nomad services in Consul but unknown locally 271 for id := range consulServices { 272 if _, ok := c.services[id]; ok { 273 // Known service, skip 274 continue 275 } 276 if !isNomadService(id) { 277 // Not managed by Nomad, skip 278 continue 279 } 280 // Unknown Nomad managed service; kill 281 if err := c.client.ServiceDeregister(id); err != nil { 282 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 283 return err 284 } 285 sdereg++ 286 metrics.IncrCounter([]string{"client", "consul", "service_deregisrations"}, 1) 287 } 288 289 // Track services whose ports have changed as their checks may also 290 // need updating 291 portsChanged := make(map[string]struct{}, len(c.services)) 292 293 // Add Nomad services missing from Consul 294 for id, locals := range c.services { 295 if remotes, ok := consulServices[id]; ok { 296 // Make sure Port and Address are stable since 297 // PortLabel and AddressMode aren't included in the 298 // service ID. 299 if locals.Port == remotes.Port && locals.Address == remotes.Address { 300 // Already exists in Consul; skip 301 continue 302 } 303 // Port changed, reregister it and its checks 304 portsChanged[id] = struct{}{} 305 } 306 if err = c.client.ServiceRegister(locals); err != nil { 307 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 308 return err 309 } 310 sreg++ 311 metrics.IncrCounter([]string{"client", "consul", "service_regisrations"}, 1) 312 } 313 314 // Remove Nomad checks in Consul but unknown locally 315 for id, check := range consulChecks { 316 if _, ok := c.checks[id]; ok { 317 // Known check, leave it 318 continue 319 } 320 if !isNomadService(check.ServiceID) { 321 // Service not managed by Nomad, skip 322 continue 323 } 324 // Unknown Nomad managed check; kill 325 if err := c.client.CheckDeregister(id); err != nil { 326 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 327 return err 328 } 329 cdereg++ 330 metrics.IncrCounter([]string{"client", "consul", "check_deregisrations"}, 1) 331 } 332 333 // Add Nomad checks missing from Consul 334 for id, check := range c.checks { 335 if check, ok := consulChecks[id]; ok { 336 if _, changed := portsChanged[check.ServiceID]; !changed { 337 // Already in Consul and ports didn't change; skipping 338 continue 339 } 340 } 341 if err := c.client.CheckRegister(check); err != nil { 342 metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1) 343 return err 344 } 345 creg++ 346 metrics.IncrCounter([]string{"client", "consul", "check_regisrations"}, 1) 347 348 // Handle starting scripts 349 if script, ok := c.scripts[id]; ok { 350 // If it's already running, cancel and replace 351 if oldScript, running := c.runningScripts[id]; running { 352 oldScript.cancel() 353 } 354 // Start and store the handle 355 c.runningScripts[id] = script.run() 356 } 357 } 358 359 // A Consul operation has succeeded, mark Consul as having been seen 360 c.markSeen() 361 362 c.logger.Printf("[DEBUG] consul.sync: registered %d services, %d checks; deregistered %d services, %d checks", 363 sreg, creg, sdereg, cdereg) 364 return nil 365 } 366 367 // RegisterAgent registers Nomad agents (client or server). The 368 // Service.PortLabel should be a literal port to be parsed with SplitHostPort. 369 // Script checks are not supported and will return an error. Registration is 370 // asynchronous. 371 // 372 // Agents will be deregistered when Shutdown is called. 373 func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service) error { 374 ops := operations{} 375 376 for _, service := range services { 377 id := makeAgentServiceID(role, service) 378 379 // Unlike tasks, agents don't use port labels. Agent ports are 380 // stored directly in the PortLabel. 381 host, rawport, err := net.SplitHostPort(service.PortLabel) 382 if err != nil { 383 return fmt.Errorf("error parsing port label %q from service %q: %v", service.PortLabel, service.Name, err) 384 } 385 port, err := strconv.Atoi(rawport) 386 if err != nil { 387 return fmt.Errorf("error parsing port %q from service %q: %v", rawport, service.Name, err) 388 } 389 serviceReg := &api.AgentServiceRegistration{ 390 ID: id, 391 Name: service.Name, 392 Tags: service.Tags, 393 Address: host, 394 Port: port, 395 } 396 ops.regServices = append(ops.regServices, serviceReg) 397 398 for _, check := range service.Checks { 399 checkID := makeCheckID(id, check) 400 if check.Type == structs.ServiceCheckScript { 401 return fmt.Errorf("service %q contains invalid check: agent checks do not support scripts", service.Name) 402 } 403 checkHost, checkPort := serviceReg.Address, serviceReg.Port 404 if check.PortLabel != "" { 405 // Unlike tasks, agents don't use port labels. Agent ports are 406 // stored directly in the PortLabel. 407 host, rawport, err := net.SplitHostPort(check.PortLabel) 408 if err != nil { 409 return fmt.Errorf("error parsing port label %q from check %q: %v", service.PortLabel, check.Name, err) 410 } 411 port, err := strconv.Atoi(rawport) 412 if err != nil { 413 return fmt.Errorf("error parsing port %q from check %q: %v", rawport, check.Name, err) 414 } 415 checkHost, checkPort = host, port 416 } 417 checkReg, err := createCheckReg(id, checkID, check, checkHost, checkPort) 418 if err != nil { 419 return fmt.Errorf("failed to add check %q: %v", check.Name, err) 420 } 421 ops.regChecks = append(ops.regChecks, checkReg) 422 } 423 } 424 425 // Don't bother committing agent checks if we're already shutting down 426 c.agentLock.Lock() 427 defer c.agentLock.Unlock() 428 select { 429 case <-c.shutdownCh: 430 return nil 431 default: 432 } 433 434 // Now add them to the registration queue 435 c.commit(&ops) 436 437 // Record IDs for deregistering on shutdown 438 for _, id := range ops.regServices { 439 c.agentServices[id.ID] = struct{}{} 440 } 441 for _, id := range ops.regChecks { 442 c.agentChecks[id.ID] = struct{}{} 443 } 444 return nil 445 } 446 447 // serviceRegs creates service registrations, check registrations, and script 448 // checks from a service. 449 func (c *ServiceClient) serviceRegs(ops *operations, allocID string, service *structs.Service, 450 task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error { 451 452 id := makeTaskServiceID(allocID, task.Name, service) 453 addrMode := service.AddressMode 454 if addrMode == structs.AddressModeAuto { 455 if net.Advertise() { 456 addrMode = structs.AddressModeDriver 457 } else { 458 // No driver network or shouldn't default to driver's network 459 addrMode = structs.AddressModeHost 460 } 461 } 462 ip, port := task.Resources.Networks.Port(service.PortLabel) 463 if addrMode == structs.AddressModeDriver { 464 if net == nil { 465 return fmt.Errorf("service %s cannot use driver's IP because driver didn't set one", service.Name) 466 } 467 ip = net.IP 468 port = net.PortMap[service.PortLabel] 469 } 470 serviceReg := &api.AgentServiceRegistration{ 471 ID: id, 472 Name: service.Name, 473 Tags: make([]string, len(service.Tags)), 474 Address: ip, 475 Port: port, 476 } 477 // copy isn't strictly necessary but can avoid bugs especially 478 // with tests that may reuse Tasks 479 copy(serviceReg.Tags, service.Tags) 480 ops.regServices = append(ops.regServices, serviceReg) 481 return c.checkRegs(ops, allocID, id, service, task, exec, net) 482 } 483 484 func (c *ServiceClient) checkRegs(ops *operations, allocID, serviceID string, service *structs.Service, 485 task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error { 486 487 for _, check := range service.Checks { 488 if check.TLSSkipVerify && !c.skipVerifySupport { 489 c.logger.Printf("[WARN] consul.sync: skipping check %q for task %q alloc %q because Consul doesn't support tls_skip_verify. Please upgrade to Consul >= 0.7.2.", 490 check.Name, task.Name, allocID) 491 continue 492 } 493 checkID := makeCheckID(serviceID, check) 494 if check.Type == structs.ServiceCheckScript { 495 if exec == nil { 496 return fmt.Errorf("driver doesn't support script checks") 497 } 498 ops.scripts = append(ops.scripts, newScriptCheck( 499 allocID, task.Name, checkID, check, exec, c.client, c.logger, c.shutdownCh)) 500 501 } 502 503 // Checks should always use the host ip:port 504 portLabel := check.PortLabel 505 if portLabel == "" { 506 // Default to the service's port label 507 portLabel = service.PortLabel 508 } 509 ip, port := task.Resources.Networks.Port(portLabel) 510 checkReg, err := createCheckReg(serviceID, checkID, check, ip, port) 511 if err != nil { 512 return fmt.Errorf("failed to add check %q: %v", check.Name, err) 513 } 514 ops.regChecks = append(ops.regChecks, checkReg) 515 } 516 return nil 517 } 518 519 // RegisterTask with Consul. Adds all sevice entries and checks to Consul. If 520 // exec is nil and a script check exists an error is returned. 521 // 522 // If the service IP is set it used as the address in the service registration. 523 // Checks will always use the IP from the Task struct (host's IP). 524 // 525 // Actual communication with Consul is done asynchrously (see Run). 526 func (c *ServiceClient) RegisterTask(allocID string, task *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error { 527 ops := &operations{} 528 for _, service := range task.Services { 529 if err := c.serviceRegs(ops, allocID, service, task, exec, net); err != nil { 530 return err 531 } 532 } 533 c.commit(ops) 534 return nil 535 } 536 537 // UpdateTask in Consul. Does not alter the service if only checks have 538 // changed. 539 // 540 // DriverNetwork must not change between invocations for the same allocation. 541 func (c *ServiceClient) UpdateTask(allocID string, existing, newTask *structs.Task, exec driver.ScriptExecutor, net *cstructs.DriverNetwork) error { 542 ops := &operations{} 543 544 existingIDs := make(map[string]*structs.Service, len(existing.Services)) 545 for _, s := range existing.Services { 546 existingIDs[makeTaskServiceID(allocID, existing.Name, s)] = s 547 } 548 newIDs := make(map[string]*structs.Service, len(newTask.Services)) 549 for _, s := range newTask.Services { 550 newIDs[makeTaskServiceID(allocID, newTask.Name, s)] = s 551 } 552 553 // Loop over existing Service IDs to see if they have been removed or 554 // updated. 555 for existingID, existingSvc := range existingIDs { 556 newSvc, ok := newIDs[existingID] 557 if !ok { 558 // Existing sevice entry removed 559 ops.deregServices = append(ops.deregServices, existingID) 560 for _, check := range existingSvc.Checks { 561 ops.deregChecks = append(ops.deregChecks, makeCheckID(existingID, check)) 562 } 563 continue 564 } 565 566 // PortLabel and AddressMode aren't included in the ID, so we 567 // have to compare manually. 568 serviceUnchanged := newSvc.PortLabel == existingSvc.PortLabel && newSvc.AddressMode == existingSvc.AddressMode 569 if serviceUnchanged { 570 // Service exists and hasn't changed, don't add it later 571 delete(newIDs, existingID) 572 } 573 574 // Check to see what checks were updated 575 existingChecks := make(map[string]struct{}, len(existingSvc.Checks)) 576 for _, check := range existingSvc.Checks { 577 existingChecks[makeCheckID(existingID, check)] = struct{}{} 578 } 579 580 // Register new checks 581 for _, check := range newSvc.Checks { 582 checkID := makeCheckID(existingID, check) 583 if _, exists := existingChecks[checkID]; exists { 584 // Check exists, so don't remove it 585 delete(existingChecks, checkID) 586 } else if serviceUnchanged { 587 // New check on an unchanged service; add them now 588 err := c.checkRegs(ops, allocID, existingID, newSvc, newTask, exec, net) 589 if err != nil { 590 return err 591 } 592 } 593 } 594 595 // Remove existing checks not in updated service 596 for cid := range existingChecks { 597 ops.deregChecks = append(ops.deregChecks, cid) 598 } 599 } 600 601 // Any remaining services should just be enqueued directly 602 for _, newSvc := range newIDs { 603 err := c.serviceRegs(ops, allocID, newSvc, newTask, exec, net) 604 if err != nil { 605 return err 606 } 607 } 608 609 c.commit(ops) 610 return nil 611 } 612 613 // RemoveTask from Consul. Removes all service entries and checks. 614 // 615 // Actual communication with Consul is done asynchrously (see Run). 616 func (c *ServiceClient) RemoveTask(allocID string, task *structs.Task) { 617 ops := operations{} 618 619 for _, service := range task.Services { 620 id := makeTaskServiceID(allocID, task.Name, service) 621 ops.deregServices = append(ops.deregServices, id) 622 623 for _, check := range service.Checks { 624 ops.deregChecks = append(ops.deregChecks, makeCheckID(id, check)) 625 } 626 } 627 628 // Now add them to the deregistration fields; main Run loop will update 629 c.commit(&ops) 630 } 631 632 // Checks returns the checks registered against the agent for the given 633 // allocation. 634 func (c *ServiceClient) Checks(a *structs.Allocation) ([]*api.AgentCheck, error) { 635 tg := a.Job.LookupTaskGroup(a.TaskGroup) 636 if tg == nil { 637 return nil, fmt.Errorf("failed to find task group in alloc") 638 } 639 640 // Determine the checks that are relevant 641 relevant := make(map[string]struct{}, 4) 642 for _, task := range tg.Tasks { 643 for _, service := range task.Services { 644 id := makeTaskServiceID(a.ID, task.Name, service) 645 for _, check := range service.Checks { 646 relevant[makeCheckID(id, check)] = struct{}{} 647 } 648 } 649 } 650 651 // Query all the checks 652 checks, err := c.client.Checks() 653 if err != nil { 654 return nil, err 655 } 656 657 allocChecks := make([]*api.AgentCheck, 0, len(relevant)) 658 for checkID := range relevant { 659 if check, ok := checks[checkID]; ok { 660 allocChecks = append(allocChecks, check) 661 } 662 } 663 664 return allocChecks, nil 665 } 666 667 // Shutdown the Consul client. Update running task registations and deregister 668 // agent from Consul. On first call blocks up to shutdownWait before giving up 669 // on syncing operations. 670 func (c *ServiceClient) Shutdown() error { 671 // Serialize Shutdown calls with RegisterAgent to prevent leaking agent 672 // entries. 673 c.agentLock.Lock() 674 defer c.agentLock.Unlock() 675 select { 676 case <-c.shutdownCh: 677 return nil 678 default: 679 close(c.shutdownCh) 680 } 681 682 // Give run loop time to sync, but don't block indefinitely 683 deadline := time.After(c.shutdownWait) 684 685 // Wait for Run to finish any outstanding operations and exit 686 select { 687 case <-c.exitCh: 688 case <-deadline: 689 // Don't wait forever though 690 } 691 692 // If Consul was never seen nothing could be written so exit early 693 if !c.hasSeen() { 694 return nil 695 } 696 697 // Always attempt to deregister Nomad agent Consul entries, even if 698 // deadline was reached 699 for id := range c.agentServices { 700 if err := c.client.ServiceDeregister(id); err != nil { 701 c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err) 702 } 703 } 704 for id := range c.agentChecks { 705 if err := c.client.CheckDeregister(id); err != nil { 706 c.logger.Printf("[ERR] consul.sync: error deregistering agent service (id: %q): %v", id, err) 707 } 708 } 709 710 // Give script checks time to exit (no need to lock as Run() has exited) 711 for _, h := range c.runningScripts { 712 select { 713 case <-h.wait(): 714 case <-deadline: 715 return fmt.Errorf("timed out waiting for script checks to run") 716 } 717 } 718 return nil 719 } 720 721 // makeAgentServiceID creates a unique ID for identifying an agent service in 722 // Consul. 723 // 724 // Agent service IDs are of the form: 725 // 726 // {nomadServicePrefix}-{ROLE}-{Service.Name}-{Service.Tags...} 727 // Example Server ID: _nomad-server-nomad-serf 728 // Example Client ID: _nomad-client-nomad-client-http 729 // 730 func makeAgentServiceID(role string, service *structs.Service) string { 731 parts := make([]string, len(service.Tags)+3) 732 parts[0] = nomadServicePrefix 733 parts[1] = role 734 parts[2] = service.Name 735 copy(parts[3:], service.Tags) 736 return strings.Join(parts, "-") 737 } 738 739 // makeTaskServiceID creates a unique ID for identifying a task service in 740 // Consul. 741 // 742 // Task service IDs are of the form: 743 // 744 // {nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...} 745 // Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3 746 // 747 func makeTaskServiceID(allocID, taskName string, service *structs.Service) string { 748 parts := make([]string, len(service.Tags)+5) 749 parts[0] = nomadServicePrefix 750 parts[1] = "executor" 751 parts[2] = allocID 752 parts[3] = taskName 753 parts[4] = service.Name 754 copy(parts[5:], service.Tags) 755 return strings.Join(parts, "-") 756 } 757 758 // makeCheckID creates a unique ID for a check. 759 func makeCheckID(serviceID string, check *structs.ServiceCheck) string { 760 return check.Hash(serviceID) 761 } 762 763 // createCheckReg creates a Check that can be registered with Consul. 764 // 765 // Script checks simply have a TTL set and the caller is responsible for 766 // running the script and heartbeating. 767 func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host string, port int) (*api.AgentCheckRegistration, error) { 768 chkReg := api.AgentCheckRegistration{ 769 ID: checkID, 770 Name: check.Name, 771 ServiceID: serviceID, 772 } 773 chkReg.Status = check.InitialStatus 774 chkReg.Timeout = check.Timeout.String() 775 chkReg.Interval = check.Interval.String() 776 777 switch check.Type { 778 case structs.ServiceCheckHTTP: 779 proto := check.Protocol 780 if proto == "" { 781 proto = "http" 782 } 783 if check.TLSSkipVerify { 784 chkReg.TLSSkipVerify = true 785 } 786 base := url.URL{ 787 Scheme: proto, 788 Host: net.JoinHostPort(host, strconv.Itoa(port)), 789 } 790 relative, err := url.Parse(check.Path) 791 if err != nil { 792 return nil, err 793 } 794 url := base.ResolveReference(relative) 795 chkReg.HTTP = url.String() 796 case structs.ServiceCheckTCP: 797 chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port)) 798 case structs.ServiceCheckScript: 799 chkReg.TTL = (check.Interval + ttlCheckBuffer).String() 800 default: 801 return nil, fmt.Errorf("check type %+q not valid", check.Type) 802 } 803 return &chkReg, nil 804 } 805 806 // isNomadService returns true if the ID matches the pattern of a Nomad managed 807 // service. Agent services return false as independent client and server agents 808 // may be running on the same machine. #2827 809 func isNomadService(id string) bool { 810 const prefix = nomadServicePrefix + "-executor" 811 return strings.HasPrefix(id, prefix) 812 }