github.com/mongey/nomad@v0.5.2/command/agent/consul/syncer.go (about) 1 // Package consul is used by Nomad to register all services both static services 2 // and dynamic via allocations. 3 // 4 // Consul Service IDs have the following format: ${nomadServicePrefix}-${groupName}-${serviceKey} 5 // groupName takes on one of the following values: 6 // - server 7 // - client 8 // - executor-${alloc-id}-${task-name} 9 // 10 // serviceKey should be generated by service registrators. 11 // If the serviceKey is being generated by the executor for a Nomad Task.Services 12 // the following helper should be used: 13 // NOTE: Executor should interpolate the service prior to calling 14 // func GenerateTaskServiceKey(service *structs.Service) string 15 // 16 // The Nomad Client reaps services registered from dead allocations that were 17 // not properly cleaned up by the executor (this is not the expected case). 18 // 19 // TODO fix this comment 20 // The Consul ServiceIDs generated by the executor will contain the allocation 21 // ID. Thus the client can generate the list of Consul ServiceIDs to keep by 22 // calling the following method on all running allocations the client is aware 23 // of: 24 // func GenerateExecutorServiceKeyPrefixFromAlloc(allocID string) string 25 package consul 26 27 import ( 28 "fmt" 29 "log" 30 "net" 31 "net/url" 32 "strconv" 33 "strings" 34 "sync" 35 "time" 36 37 consul "github.com/hashicorp/consul/api" 38 "github.com/hashicorp/go-multierror" 39 40 "github.com/hashicorp/nomad/nomad/structs" 41 "github.com/hashicorp/nomad/nomad/structs/config" 42 "github.com/hashicorp/nomad/nomad/types" 43 ) 44 45 const ( 46 // initialSyncBuffer is the max time an initial sync will sleep 47 // before syncing. 48 initialSyncBuffer = 30 * time.Second 49 50 // initialSyncDelay is the delay before an initial sync. 51 initialSyncDelay = 5 * time.Second 52 53 // nomadServicePrefix is the first prefix that scopes all Nomad registered 54 // services 55 nomadServicePrefix = "_nomad" 56 57 // The periodic time interval for syncing services and checks with Consul 58 defaultSyncInterval = 6 * time.Second 59 60 // defaultSyncJitter provides a little variance in the frequency at which 61 // Syncer polls Consul. 62 defaultSyncJitter = time.Second 63 64 // ttlCheckBuffer is the time interval that Nomad can take to report Consul 65 // the check result 66 ttlCheckBuffer = 31 * time.Second 67 68 // DefaultQueryWaitDuration is the max duration the Consul Agent will 69 // spend waiting for a response from a Consul Query. 70 DefaultQueryWaitDuration = 2 * time.Second 71 72 // ServiceTagHTTP is the tag assigned to HTTP services 73 ServiceTagHTTP = "http" 74 75 // ServiceTagRPC is the tag assigned to RPC services 76 ServiceTagRPC = "rpc" 77 78 // ServiceTagSerf is the tag assigned to Serf services 79 ServiceTagSerf = "serf" 80 ) 81 82 // consulServiceID and consulCheckID are the IDs registered with Consul 83 type consulServiceID string 84 type consulCheckID string 85 86 // ServiceKey is the generated service key that is used to build the Consul 87 // ServiceID 88 type ServiceKey string 89 90 // ServiceDomain is the domain of services registered by Nomad 91 type ServiceDomain string 92 93 const ( 94 ClientDomain ServiceDomain = "client" 95 ServerDomain ServiceDomain = "server" 96 ) 97 98 // NewExecutorDomain returns a domain specific to the alloc ID and task 99 func NewExecutorDomain(allocID, task string) ServiceDomain { 100 return ServiceDomain(fmt.Sprintf("executor-%s-%s", allocID, task)) 101 } 102 103 // Syncer allows syncing of services and checks with Consul 104 type Syncer struct { 105 client *consul.Client 106 consulAvailable bool 107 108 // servicesGroups and checkGroups are named groups of services and checks 109 // respectively that will be flattened and reconciled with Consul when 110 // SyncServices() is called. The key to the servicesGroups map is unique 111 // per handler and is used to allow the Agent's services to be maintained 112 // independently of the Client or Server's services. 113 servicesGroups map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration 114 checkGroups map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration 115 groupsLock sync.RWMutex 116 117 // The "Consul Registry" is a collection of Consul Services and 118 // Checks all guarded by the registryLock. 119 registryLock sync.RWMutex 120 121 // checkRunners are delegated Consul checks being ran by the Syncer 122 checkRunners map[consulCheckID]*CheckRunner 123 124 addrFinder func(portLabel string) (string, int) 125 createDelegatedCheck func(*structs.ServiceCheck, string) (Check, error) 126 delegateChecks map[string]struct{} // delegateChecks are the checks that the Nomad client runs and reports to Consul 127 // End registryLock guarded attributes. 128 129 logger *log.Logger 130 131 shutdownCh chan struct{} 132 shutdown bool 133 shutdownLock sync.Mutex 134 135 // notifyShutdownCh is used to notify a Syncer it needs to shutdown. 136 // This can happen because there was an explicit call to the Syncer's 137 // Shutdown() method, or because the calling task signaled the 138 // program is going to exit by closing its shutdownCh. 139 notifyShutdownCh chan struct{} 140 141 // periodicCallbacks is walked sequentially when the timer in Run 142 // fires. 143 periodicCallbacks map[string]types.PeriodicCallback 144 notifySyncCh chan struct{} 145 periodicLock sync.RWMutex 146 147 // The periodic time interval for syncing services and checks with Consul 148 syncInterval time.Duration 149 150 // syncJitter provides a little variance in the frequency at which 151 // Syncer polls Consul. 152 syncJitter time.Duration 153 } 154 155 // NewSyncer returns a new consul.Syncer 156 func NewSyncer(consulConfig *config.ConsulConfig, shutdownCh chan struct{}, logger *log.Logger) (*Syncer, error) { 157 var consulClientConfig *consul.Config 158 var err error 159 consulClientConfig, err = consulConfig.ApiConfig() 160 if err != nil { 161 return nil, err 162 } 163 164 var consulClient *consul.Client 165 if consulClient, err = consul.NewClient(consulClientConfig); err != nil { 166 return nil, err 167 } 168 consulSyncer := Syncer{ 169 client: consulClient, 170 logger: logger, 171 consulAvailable: true, 172 shutdownCh: shutdownCh, 173 servicesGroups: make(map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration), 174 checkGroups: make(map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration), 175 checkRunners: make(map[consulCheckID]*CheckRunner), 176 periodicCallbacks: make(map[string]types.PeriodicCallback), 177 notifySyncCh: make(chan struct{}, 1), 178 // default noop implementation of addrFinder 179 addrFinder: func(string) (string, int) { return "", 0 }, 180 syncInterval: defaultSyncInterval, 181 syncJitter: defaultSyncJitter, 182 } 183 184 return &consulSyncer, nil 185 } 186 187 // SetDelegatedChecks sets the checks that nomad is going to run and report the 188 // result back to consul 189 func (c *Syncer) SetDelegatedChecks(delegateChecks map[string]struct{}, createDelegatedCheckFn func(*structs.ServiceCheck, string) (Check, error)) *Syncer { 190 c.delegateChecks = delegateChecks 191 c.createDelegatedCheck = createDelegatedCheckFn 192 return c 193 } 194 195 // SetAddrFinder sets a function to find the host and port for a Service given its port label 196 func (c *Syncer) SetAddrFinder(addrFinder func(string) (string, int)) *Syncer { 197 c.addrFinder = addrFinder 198 return c 199 } 200 201 // GenerateServiceKey should be called to generate a serviceKey based on the 202 // Service. 203 func GenerateServiceKey(service *structs.Service) ServiceKey { 204 var key string 205 numTags := len(service.Tags) 206 switch numTags { 207 case 0: 208 key = fmt.Sprintf("%s", service.Name) 209 default: 210 tags := strings.Join(service.Tags, "-") 211 key = fmt.Sprintf("%s-%s", service.Name, tags) 212 } 213 return ServiceKey(key) 214 } 215 216 // SetServices stores the map of Nomad Services to the provided service 217 // domain name. 218 func (c *Syncer) SetServices(domain ServiceDomain, services map[ServiceKey]*structs.Service) error { 219 var mErr multierror.Error 220 numServ := len(services) 221 registeredServices := make(map[ServiceKey]*consul.AgentServiceRegistration, numServ) 222 registeredChecks := make(map[ServiceKey][]*consul.AgentCheckRegistration, numServ) 223 for serviceKey, service := range services { 224 serviceReg, err := c.createService(service, domain, serviceKey) 225 if err != nil { 226 mErr.Errors = append(mErr.Errors, err) 227 continue 228 } 229 registeredServices[serviceKey] = serviceReg 230 231 // Register the check(s) for this service 232 for _, chk := range service.Checks { 233 // Create a Consul check registration 234 chkReg, err := c.createCheckReg(chk, serviceReg) 235 if err != nil { 236 mErr.Errors = append(mErr.Errors, err) 237 continue 238 } 239 240 // creating a nomad check if we have to handle this particular check type 241 c.registryLock.RLock() 242 if _, ok := c.delegateChecks[chk.Type]; ok { 243 _, ok := c.checkRunners[consulCheckID(chkReg.ID)] 244 c.registryLock.RUnlock() 245 if ok { 246 continue 247 } 248 249 nc, err := c.createDelegatedCheck(chk, chkReg.ID) 250 if err != nil { 251 mErr.Errors = append(mErr.Errors, err) 252 continue 253 } 254 255 cr := NewCheckRunner(nc, c.runCheck, c.logger) 256 c.registryLock.Lock() 257 // TODO type the CheckRunner 258 c.checkRunners[consulCheckID(nc.ID())] = cr 259 c.registryLock.Unlock() 260 } else { 261 c.registryLock.RUnlock() 262 } 263 264 registeredChecks[serviceKey] = append(registeredChecks[serviceKey], chkReg) 265 } 266 } 267 268 if len(mErr.Errors) > 0 { 269 return mErr.ErrorOrNil() 270 } 271 272 // Update the services and checks groups for this domain 273 c.groupsLock.Lock() 274 275 // Create map for service group if it doesn't exist 276 serviceKeys, ok := c.servicesGroups[domain] 277 if !ok { 278 serviceKeys = make(map[ServiceKey]*consul.AgentServiceRegistration, len(registeredServices)) 279 c.servicesGroups[domain] = serviceKeys 280 } 281 282 // Remove stale services 283 for existingServiceKey := range serviceKeys { 284 if _, ok := registeredServices[existingServiceKey]; !ok { 285 // Exisitng service needs to be removed 286 delete(serviceKeys, existingServiceKey) 287 } 288 } 289 290 // Add registered services 291 for serviceKey, service := range registeredServices { 292 serviceKeys[serviceKey] = service 293 } 294 295 // Create map for check group if it doesn't exist 296 checkKeys, ok := c.checkGroups[domain] 297 if !ok { 298 checkKeys = make(map[ServiceKey][]*consul.AgentCheckRegistration, len(registeredChecks)) 299 c.checkGroups[domain] = checkKeys 300 } 301 302 // Remove stale checks 303 for existingCheckKey := range checkKeys { 304 if _, ok := registeredChecks[existingCheckKey]; !ok { 305 // Exisitng check needs to be removed 306 delete(checkKeys, existingCheckKey) 307 } 308 } 309 310 // Add registered checks 311 for checkKey, checks := range registeredChecks { 312 checkKeys[checkKey] = checks 313 } 314 c.groupsLock.Unlock() 315 316 // Sync immediately 317 c.SyncNow() 318 319 return nil 320 } 321 322 // SyncNow expires the current timer forcing the list of periodic callbacks 323 // to be synced immediately. 324 func (c *Syncer) SyncNow() { 325 select { 326 case c.notifySyncCh <- struct{}{}: 327 default: 328 } 329 } 330 331 // flattenedServices returns a flattened list of services that are registered 332 // locally 333 func (c *Syncer) flattenedServices() []*consul.AgentServiceRegistration { 334 const initialNumServices = 8 335 services := make([]*consul.AgentServiceRegistration, 0, initialNumServices) 336 c.groupsLock.RLock() 337 defer c.groupsLock.RUnlock() 338 for _, servicesGroup := range c.servicesGroups { 339 for _, service := range servicesGroup { 340 services = append(services, service) 341 } 342 } 343 return services 344 } 345 346 // flattenedChecks returns a flattened list of checks that are registered 347 // locally 348 func (c *Syncer) flattenedChecks() []*consul.AgentCheckRegistration { 349 const initialNumChecks = 8 350 checks := make([]*consul.AgentCheckRegistration, 0, initialNumChecks) 351 c.groupsLock.RLock() 352 for _, checkGroup := range c.checkGroups { 353 for _, check := range checkGroup { 354 checks = append(checks, check...) 355 } 356 } 357 c.groupsLock.RUnlock() 358 return checks 359 } 360 361 func (c *Syncer) signalShutdown() { 362 select { 363 case c.notifyShutdownCh <- struct{}{}: 364 default: 365 } 366 } 367 368 // Shutdown de-registers the services and checks and shuts down periodic syncing 369 func (c *Syncer) Shutdown() error { 370 var mErr multierror.Error 371 372 c.shutdownLock.Lock() 373 if !c.shutdown { 374 c.shutdown = true 375 } 376 c.shutdownLock.Unlock() 377 378 c.signalShutdown() 379 380 // Stop all the checks that nomad is running 381 c.registryLock.RLock() 382 defer c.registryLock.RUnlock() 383 for _, cr := range c.checkRunners { 384 cr.Stop() 385 } 386 387 // De-register all the services registered by this syncer from Consul 388 services, err := c.queryAgentServices() 389 if err != nil { 390 mErr.Errors = append(mErr.Errors, err) 391 } 392 for serviceID := range services { 393 convertedID := string(serviceID) 394 if err := c.client.Agent().ServiceDeregister(convertedID); err != nil { 395 c.logger.Printf("[WARN] consul.syncer: failed to deregister service ID %+q: %v", convertedID, err) 396 mErr.Errors = append(mErr.Errors, err) 397 } 398 } 399 return mErr.ErrorOrNil() 400 } 401 402 // queryChecks queries the Consul Agent for a list of Consul checks that 403 // have been registered with this Consul Syncer. 404 func (c *Syncer) queryChecks() (map[consulCheckID]*consul.AgentCheck, error) { 405 checks, err := c.client.Agent().Checks() 406 if err != nil { 407 return nil, err 408 } 409 return c.filterConsulChecks(checks), nil 410 } 411 412 // queryAgentServices queries the Consul Agent for a list of Consul services that 413 // have been registered with this Consul Syncer. 414 func (c *Syncer) queryAgentServices() (map[consulServiceID]*consul.AgentService, error) { 415 services, err := c.client.Agent().Services() 416 if err != nil { 417 return nil, err 418 } 419 return c.filterConsulServices(services), nil 420 } 421 422 // syncChecks synchronizes this Syncer's Consul Checks with the Consul Agent. 423 func (c *Syncer) syncChecks() error { 424 var mErr multierror.Error 425 consulChecks, err := c.queryChecks() 426 if err != nil { 427 return err 428 } 429 430 // Synchronize checks with Consul 431 missingChecks, existingChecks, changedChecks, staleChecks := c.calcChecksDiff(consulChecks) 432 for _, check := range missingChecks { 433 if err := c.registerCheck(check); err != nil { 434 mErr.Errors = append(mErr.Errors, err) 435 } 436 } 437 for _, check := range existingChecks { 438 c.ensureCheckRunning(check) 439 } 440 for _, check := range changedChecks { 441 // NOTE(sean@): Do we need to deregister the check before 442 // re-registering it? Not deregistering to avoid missing the 443 // TTL but doesn't correct reconcile any possible drift with 444 // the check. 445 // 446 // if err := c.deregisterCheck(check.ID); err != nil { 447 // mErr.Errors = append(mErr.Errors, err) 448 // } 449 if err := c.registerCheck(check); err != nil { 450 mErr.Errors = append(mErr.Errors, err) 451 } 452 } 453 for _, check := range staleChecks { 454 if err := c.deregisterCheck(consulCheckID(check.ID)); err != nil { 455 mErr.Errors = append(mErr.Errors, err) 456 } 457 } 458 return mErr.ErrorOrNil() 459 } 460 461 // compareConsulCheck takes a consul.AgentCheckRegistration instance and 462 // compares it with a consul.AgentCheck. Returns true if they are equal 463 // according to consul.AgentCheck, otherwise false. 464 func compareConsulCheck(localCheck *consul.AgentCheckRegistration, consulCheck *consul.AgentCheck) bool { 465 if consulCheck.CheckID != localCheck.ID || 466 consulCheck.Name != localCheck.Name || 467 consulCheck.Notes != localCheck.Notes || 468 consulCheck.ServiceID != localCheck.ServiceID { 469 return false 470 } 471 return true 472 } 473 474 // calcChecksDiff takes the argument (consulChecks) and calculates the delta 475 // between the consul.Syncer's list of known checks (c.flattenedChecks()). 476 // Four arrays are returned: 477 // 478 // 1) a slice of checks that exist only locally in the Syncer and are missing 479 // from the Consul Agent (consulChecks) and therefore need to be registered. 480 // 481 // 2) a slice of checks that exist in both the local consul.Syncer's 482 // tracked list and Consul Agent (consulChecks). 483 // 484 // 3) a slice of checks that exist in both the local consul.Syncer's 485 // tracked list and Consul Agent (consulServices) but have diverged state. 486 // 487 // 4) a slice of checks that exist only in the Consul Agent (consulChecks) 488 // and should be removed because the Consul Agent has drifted from the 489 // Syncer. 490 func (c *Syncer) calcChecksDiff(consulChecks map[consulCheckID]*consul.AgentCheck) ( 491 missingChecks []*consul.AgentCheckRegistration, 492 equalChecks []*consul.AgentCheckRegistration, 493 changedChecks []*consul.AgentCheckRegistration, 494 staleChecks []*consul.AgentCheckRegistration) { 495 496 type mergedCheck struct { 497 check *consul.AgentCheckRegistration 498 // 'l' == Nomad local only 499 // 'e' == equal 500 // 'c' == changed 501 // 'a' == Consul agent only 502 state byte 503 } 504 var ( 505 localChecksCount = 0 506 equalChecksCount = 0 507 changedChecksCount = 0 508 agentChecks = 0 509 ) 510 flattenedChecks := c.flattenedChecks() 511 localChecks := make(map[string]*mergedCheck, len(flattenedChecks)+len(consulChecks)) 512 for _, localCheck := range flattenedChecks { 513 localChecksCount++ 514 localChecks[localCheck.ID] = &mergedCheck{localCheck, 'l'} 515 } 516 for _, consulCheck := range consulChecks { 517 if localCheck, found := localChecks[consulCheck.CheckID]; found { 518 localChecksCount-- 519 if compareConsulCheck(localCheck.check, consulCheck) { 520 equalChecksCount++ 521 localChecks[consulCheck.CheckID].state = 'e' 522 } else { 523 changedChecksCount++ 524 localChecks[consulCheck.CheckID].state = 'c' 525 } 526 } else { 527 agentChecks++ 528 agentCheckReg := &consul.AgentCheckRegistration{ 529 ID: consulCheck.CheckID, 530 Name: consulCheck.Name, 531 Notes: consulCheck.Notes, 532 ServiceID: consulCheck.ServiceID, 533 } 534 localChecks[consulCheck.CheckID] = &mergedCheck{agentCheckReg, 'a'} 535 } 536 } 537 538 missingChecks = make([]*consul.AgentCheckRegistration, 0, localChecksCount) 539 equalChecks = make([]*consul.AgentCheckRegistration, 0, equalChecksCount) 540 changedChecks = make([]*consul.AgentCheckRegistration, 0, changedChecksCount) 541 staleChecks = make([]*consul.AgentCheckRegistration, 0, agentChecks) 542 for _, check := range localChecks { 543 switch check.state { 544 case 'l': 545 missingChecks = append(missingChecks, check.check) 546 case 'e': 547 equalChecks = append(equalChecks, check.check) 548 case 'c': 549 changedChecks = append(changedChecks, check.check) 550 case 'a': 551 staleChecks = append(staleChecks, check.check) 552 } 553 } 554 555 return missingChecks, equalChecks, changedChecks, staleChecks 556 } 557 558 // compareConsulService takes a consul.AgentServiceRegistration instance and 559 // compares it with a consul.AgentService. Returns true if they are equal 560 // according to consul.AgentService, otherwise false. 561 func compareConsulService(localService *consul.AgentServiceRegistration, consulService *consul.AgentService) bool { 562 if consulService.ID != localService.ID || 563 consulService.Service != localService.Name || 564 consulService.Port != localService.Port || 565 consulService.Address != localService.Address || 566 consulService.EnableTagOverride != localService.EnableTagOverride { 567 return false 568 } 569 570 serviceTags := make(map[string]byte, len(localService.Tags)) 571 for _, tag := range localService.Tags { 572 serviceTags[tag] = 'l' 573 } 574 for _, tag := range consulService.Tags { 575 if _, found := serviceTags[tag]; !found { 576 return false 577 } 578 serviceTags[tag] = 'b' 579 } 580 for _, state := range serviceTags { 581 if state == 'l' { 582 return false 583 } 584 } 585 586 return true 587 } 588 589 // calcServicesDiff takes the argument (consulServices) and calculates the 590 // delta between the consul.Syncer's list of known services 591 // (c.flattenedServices()). Four arrays are returned: 592 // 593 // 1) a slice of services that exist only locally in the Syncer and are 594 // missing from the Consul Agent (consulServices) and therefore need to be 595 // registered. 596 // 597 // 2) a slice of services that exist in both the local consul.Syncer's 598 // tracked list and Consul Agent (consulServices) *AND* are identical. 599 // 600 // 3) a slice of services that exist in both the local consul.Syncer's 601 // tracked list and Consul Agent (consulServices) but have diverged state. 602 // 603 // 4) a slice of services that exist only in the Consul Agent 604 // (consulServices) and should be removed because the Consul Agent has 605 // drifted from the Syncer. 606 func (c *Syncer) calcServicesDiff(consulServices map[consulServiceID]*consul.AgentService) (missingServices []*consul.AgentServiceRegistration, equalServices []*consul.AgentServiceRegistration, changedServices []*consul.AgentServiceRegistration, staleServices []*consul.AgentServiceRegistration) { 607 type mergedService struct { 608 service *consul.AgentServiceRegistration 609 // 'l' == Nomad local only 610 // 'e' == equal 611 // 'c' == changed 612 // 'a' == Consul agent only 613 state byte 614 } 615 var ( 616 localServicesCount = 0 617 equalServicesCount = 0 618 changedServicesCount = 0 619 agentServices = 0 620 ) 621 flattenedServices := c.flattenedServices() 622 localServices := make(map[string]*mergedService, len(flattenedServices)+len(consulServices)) 623 for _, localService := range flattenedServices { 624 localServicesCount++ 625 localServices[localService.ID] = &mergedService{localService, 'l'} 626 } 627 for _, consulService := range consulServices { 628 if localService, found := localServices[consulService.ID]; found { 629 localServicesCount-- 630 if compareConsulService(localService.service, consulService) { 631 equalServicesCount++ 632 localServices[consulService.ID].state = 'e' 633 } else { 634 changedServicesCount++ 635 localServices[consulService.ID].state = 'c' 636 } 637 } else { 638 agentServices++ 639 agentServiceReg := &consul.AgentServiceRegistration{ 640 ID: consulService.ID, 641 Name: consulService.Service, 642 Tags: consulService.Tags, 643 Port: consulService.Port, 644 Address: consulService.Address, 645 } 646 localServices[consulService.ID] = &mergedService{agentServiceReg, 'a'} 647 } 648 } 649 650 missingServices = make([]*consul.AgentServiceRegistration, 0, localServicesCount) 651 equalServices = make([]*consul.AgentServiceRegistration, 0, equalServicesCount) 652 changedServices = make([]*consul.AgentServiceRegistration, 0, changedServicesCount) 653 staleServices = make([]*consul.AgentServiceRegistration, 0, agentServices) 654 for _, service := range localServices { 655 switch service.state { 656 case 'l': 657 missingServices = append(missingServices, service.service) 658 case 'e': 659 equalServices = append(equalServices, service.service) 660 case 'c': 661 changedServices = append(changedServices, service.service) 662 case 'a': 663 staleServices = append(staleServices, service.service) 664 } 665 } 666 667 return missingServices, equalServices, changedServices, staleServices 668 } 669 670 // syncServices synchronizes this Syncer's Consul Services with the Consul 671 // Agent. 672 func (c *Syncer) syncServices() error { 673 consulServices, err := c.queryAgentServices() 674 if err != nil { 675 return err 676 } 677 678 // Synchronize services with Consul 679 var mErr multierror.Error 680 missingServices, _, changedServices, removedServices := c.calcServicesDiff(consulServices) 681 for _, service := range missingServices { 682 if err := c.client.Agent().ServiceRegister(service); err != nil { 683 mErr.Errors = append(mErr.Errors, err) 684 } 685 } 686 for _, service := range changedServices { 687 // Re-register the local service 688 if err := c.client.Agent().ServiceRegister(service); err != nil { 689 mErr.Errors = append(mErr.Errors, err) 690 } 691 } 692 for _, service := range removedServices { 693 if err := c.deregisterService(service.ID); err != nil { 694 mErr.Errors = append(mErr.Errors, err) 695 } 696 } 697 return mErr.ErrorOrNil() 698 } 699 700 // registerCheck registers a check definition with Consul 701 func (c *Syncer) registerCheck(chkReg *consul.AgentCheckRegistration) error { 702 c.registryLock.RLock() 703 if cr, ok := c.checkRunners[consulCheckID(chkReg.ID)]; ok { 704 cr.Start() 705 } 706 c.registryLock.RUnlock() 707 return c.client.Agent().CheckRegister(chkReg) 708 } 709 710 // ensureCheckRunning starts the check runner for a check if it's not already running 711 func (c *Syncer) ensureCheckRunning(chk *consul.AgentCheckRegistration) { 712 c.registryLock.RLock() 713 defer c.registryLock.RUnlock() 714 if cr, ok := c.checkRunners[consulCheckID(chk.ID)]; ok && !cr.Started() { 715 c.logger.Printf("[DEBUG] consul.syncer: starting runner for existing check. %v", chk.ID) 716 cr.Start() 717 } 718 } 719 720 // createCheckReg creates a Check that can be registered with Nomad. It also 721 // creates a Nomad check for the check types that it can handle. 722 func (c *Syncer) createCheckReg(check *structs.ServiceCheck, serviceReg *consul.AgentServiceRegistration) (*consul.AgentCheckRegistration, error) { 723 chkReg := consul.AgentCheckRegistration{ 724 ID: check.Hash(serviceReg.ID), 725 Name: check.Name, 726 ServiceID: serviceReg.ID, 727 } 728 chkReg.Timeout = check.Timeout.String() 729 chkReg.Interval = check.Interval.String() 730 host, port := serviceReg.Address, serviceReg.Port 731 if check.PortLabel != "" { 732 host, port = c.addrFinder(check.PortLabel) 733 } 734 switch check.Type { 735 case structs.ServiceCheckHTTP: 736 if check.Protocol == "" { 737 check.Protocol = "http" 738 } 739 base := url.URL{ 740 Scheme: check.Protocol, 741 Host: net.JoinHostPort(host, strconv.Itoa(port)), 742 } 743 relative, err := url.Parse(check.Path) 744 if err != nil { 745 return nil, err 746 } 747 url := base.ResolveReference(relative) 748 chkReg.HTTP = url.String() 749 case structs.ServiceCheckTCP: 750 chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port)) 751 case structs.ServiceCheckScript: 752 chkReg.TTL = (check.Interval + ttlCheckBuffer).String() 753 default: 754 return nil, fmt.Errorf("check type %+q not valid", check.Type) 755 } 756 chkReg.Status = check.InitialStatus 757 return &chkReg, nil 758 } 759 760 // generateConsulServiceID takes the domain and service key and returns a Consul 761 // ServiceID 762 func generateConsulServiceID(domain ServiceDomain, key ServiceKey) consulServiceID { 763 return consulServiceID(fmt.Sprintf("%s-%s-%s", nomadServicePrefix, domain, key)) 764 } 765 766 // createService creates a Consul AgentService from a Nomad ConsulService. 767 func (c *Syncer) createService(service *structs.Service, domain ServiceDomain, key ServiceKey) (*consul.AgentServiceRegistration, error) { 768 c.registryLock.RLock() 769 defer c.registryLock.RUnlock() 770 771 srv := consul.AgentServiceRegistration{ 772 ID: string(generateConsulServiceID(domain, key)), 773 Name: service.Name, 774 Tags: service.Tags, 775 } 776 host, port := c.addrFinder(service.PortLabel) 777 if host != "" { 778 srv.Address = host 779 } 780 781 if port != 0 { 782 srv.Port = port 783 } 784 785 return &srv, nil 786 } 787 788 // deregisterService de-registers a service with the given ID from consul 789 func (c *Syncer) deregisterService(serviceID string) error { 790 return c.client.Agent().ServiceDeregister(serviceID) 791 } 792 793 // deregisterCheck de-registers a check from Consul 794 func (c *Syncer) deregisterCheck(id consulCheckID) error { 795 c.registryLock.Lock() 796 defer c.registryLock.Unlock() 797 798 // Deleting from Consul Agent 799 if err := c.client.Agent().CheckDeregister(string(id)); err != nil { 800 // CheckDeregister() will be reattempted again in a future 801 // sync. 802 return err 803 } 804 805 // Remove the check from the local registry 806 if cr, ok := c.checkRunners[id]; ok { 807 cr.Stop() 808 delete(c.checkRunners, id) 809 } 810 811 return nil 812 } 813 814 // Run triggers periodic syncing of services and checks with Consul. This is 815 // a long lived go-routine which is stopped during shutdown. 816 func (c *Syncer) Run() { 817 sync := time.NewTimer(0) 818 for { 819 select { 820 case <-sync.C: 821 d := c.syncInterval - c.syncJitter 822 sync.Reset(d) 823 824 if err := c.SyncServices(); err != nil { 825 if c.consulAvailable { 826 c.logger.Printf("[DEBUG] consul.syncer: error in syncing: %v", err) 827 } 828 c.consulAvailable = false 829 } else { 830 if !c.consulAvailable { 831 c.logger.Printf("[DEBUG] consul.syncer: syncs succesful") 832 } 833 c.consulAvailable = true 834 } 835 case <-c.notifySyncCh: 836 sync.Reset(0) 837 case <-c.shutdownCh: 838 c.Shutdown() 839 case <-c.notifyShutdownCh: 840 sync.Stop() 841 c.logger.Printf("[INFO] consul.syncer: shutting down syncer ") 842 return 843 } 844 } 845 } 846 847 // RunHandlers executes each handler (randomly) 848 func (c *Syncer) RunHandlers() error { 849 c.periodicLock.RLock() 850 handlers := make(map[string]types.PeriodicCallback, len(c.periodicCallbacks)) 851 for name, fn := range c.periodicCallbacks { 852 handlers[name] = fn 853 } 854 c.periodicLock.RUnlock() 855 856 var mErr multierror.Error 857 for _, fn := range handlers { 858 if err := fn(); err != nil { 859 mErr.Errors = append(mErr.Errors, err) 860 } 861 } 862 return mErr.ErrorOrNil() 863 } 864 865 // SyncServices sync the services with the Consul Agent 866 func (c *Syncer) SyncServices() error { 867 var mErr multierror.Error 868 if err := c.syncServices(); err != nil { 869 mErr.Errors = append(mErr.Errors, err) 870 } 871 if err := c.syncChecks(); err != nil { 872 mErr.Errors = append(mErr.Errors, err) 873 } 874 if err := c.RunHandlers(); err != nil { 875 return err 876 } 877 return mErr.ErrorOrNil() 878 } 879 880 // filterConsulServices prunes out all the service who were not registered with 881 // the syncer 882 func (c *Syncer) filterConsulServices(consulServices map[string]*consul.AgentService) map[consulServiceID]*consul.AgentService { 883 localServices := make(map[consulServiceID]*consul.AgentService, len(consulServices)) 884 c.groupsLock.RLock() 885 defer c.groupsLock.RUnlock() 886 for serviceID, service := range consulServices { 887 for domain := range c.servicesGroups { 888 if strings.HasPrefix(service.ID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) { 889 localServices[consulServiceID(serviceID)] = service 890 break 891 } 892 } 893 } 894 return localServices 895 } 896 897 // filterConsulChecks prunes out all the consul checks which do not have 898 // services with Syncer's idPrefix. 899 func (c *Syncer) filterConsulChecks(consulChecks map[string]*consul.AgentCheck) map[consulCheckID]*consul.AgentCheck { 900 localChecks := make(map[consulCheckID]*consul.AgentCheck, len(consulChecks)) 901 c.groupsLock.RLock() 902 defer c.groupsLock.RUnlock() 903 for checkID, check := range consulChecks { 904 for domain := range c.checkGroups { 905 if strings.HasPrefix(check.ServiceID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) { 906 localChecks[consulCheckID(checkID)] = check 907 break 908 } 909 } 910 } 911 return localChecks 912 } 913 914 // consulPresent indicates whether the Consul Agent is responding 915 func (c *Syncer) consulPresent() bool { 916 _, err := c.client.Agent().Self() 917 return err == nil 918 } 919 920 // runCheck runs a check and updates the corresponding ttl check in consul 921 func (c *Syncer) runCheck(check Check) { 922 res := check.Run() 923 if res.Duration >= check.Timeout() { 924 c.logger.Printf("[DEBUG] consul.syncer: check took time: %v, timeout: %v", res.Duration, check.Timeout()) 925 } 926 state := consul.HealthCritical 927 output := res.Output 928 switch res.ExitCode { 929 case 0: 930 state = consul.HealthPassing 931 case 1: 932 state = consul.HealthWarning 933 default: 934 state = consul.HealthCritical 935 } 936 if res.Err != nil { 937 state = consul.HealthCritical 938 output = res.Err.Error() 939 } 940 if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil { 941 if c.consulAvailable { 942 c.logger.Printf("[DEBUG] consul.syncer: check %+q failed, disabling Consul checks until until next successful sync: %v", check.ID(), err) 943 c.consulAvailable = false 944 } else { 945 c.consulAvailable = true 946 } 947 } 948 } 949 950 // ReapUnmatched prunes all services that do not exist in the passed domains 951 func (c *Syncer) ReapUnmatched(domains []ServiceDomain) error { 952 servicesInConsul, err := c.ConsulClient().Agent().Services() 953 if err != nil { 954 return err 955 } 956 957 var mErr multierror.Error 958 for serviceID := range servicesInConsul { 959 // Skip any service that was not registered by Nomad 960 if !strings.HasPrefix(serviceID, nomadServicePrefix) { 961 continue 962 } 963 964 // Filter services that do not exist in the desired domains 965 match := false 966 for _, domain := range domains { 967 // Include the hyphen so it is explicit to that domain otherwise it 968 // maybe a subset match 969 desired := fmt.Sprintf("%s-%s-", nomadServicePrefix, domain) 970 if strings.HasPrefix(serviceID, desired) { 971 match = true 972 break 973 } 974 } 975 976 if !match { 977 if err := c.deregisterService(serviceID); err != nil { 978 mErr.Errors = append(mErr.Errors, err) 979 } 980 } 981 } 982 983 return mErr.ErrorOrNil() 984 } 985 986 // AddPeriodicHandler adds a uniquely named callback. Returns true if 987 // successful, false if a handler with the same name already exists. 988 func (c *Syncer) AddPeriodicHandler(name string, fn types.PeriodicCallback) bool { 989 c.periodicLock.Lock() 990 defer c.periodicLock.Unlock() 991 if _, found := c.periodicCallbacks[name]; found { 992 c.logger.Printf("[ERROR] consul.syncer: failed adding handler %+q", name) 993 return false 994 } 995 c.periodicCallbacks[name] = fn 996 return true 997 } 998 999 // NumHandlers returns the number of callbacks registered with the syncer 1000 func (c *Syncer) NumHandlers() int { 1001 c.periodicLock.RLock() 1002 defer c.periodicLock.RUnlock() 1003 return len(c.periodicCallbacks) 1004 } 1005 1006 // RemovePeriodicHandler removes a handler with a given name. 1007 func (c *Syncer) RemovePeriodicHandler(name string) { 1008 c.periodicLock.Lock() 1009 defer c.periodicLock.Unlock() 1010 delete(c.periodicCallbacks, name) 1011 } 1012 1013 // ConsulClient returns the Consul client used by the Syncer. 1014 func (c *Syncer) ConsulClient() *consul.Client { 1015 return c.client 1016 }