github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/command/agent/consul/syncer.go (about) 1 // Package consul is used by Nomad to register all services both static services 2 // and dynamic via allocations. 3 // 4 // Consul Service IDs have the following format: ${nomadServicePrefix}-${groupName}-${serviceKey} 5 // groupName takes on one of the following values: 6 // - server 7 // - client 8 // - executor-${alloc-id}-${task-name} 9 // 10 // serviceKey should be generated by service registrators. 11 // If the serviceKey is being generated by the executor for a Nomad Task.Services 12 // the following helper should be used: 13 // NOTE: Executor should interpolate the service prior to calling 14 // func GenerateTaskServiceKey(service *structs.Service) string 15 // 16 // The Nomad Client reaps services registered from dead allocations that were 17 // not properly cleaned up by the executor (this is not the expected case). 18 // 19 // TODO fix this comment 20 // The Consul ServiceIDs generated by the executor will contain the allocation 21 // ID. Thus the client can generate the list of Consul ServiceIDs to keep by 22 // calling the following method on all running allocations the client is aware 23 // of: 24 // func GenerateExecutorServiceKeyPrefixFromAlloc(allocID string) string 25 package consul 26 27 import ( 28 "fmt" 29 "log" 30 "net" 31 "net/url" 32 "strconv" 33 "strings" 34 "sync" 35 "time" 36 37 consul "github.com/hashicorp/consul/api" 38 "github.com/hashicorp/consul/lib" 39 "github.com/hashicorp/go-multierror" 40 41 "github.com/hashicorp/nomad/nomad/structs" 42 "github.com/hashicorp/nomad/nomad/structs/config" 43 "github.com/hashicorp/nomad/nomad/types" 44 ) 45 46 const ( 47 // initialSyncBuffer is the max time an initial sync will sleep 48 // before syncing. 49 initialSyncBuffer = 30 * time.Second 50 51 // initialSyncDelay is the delay before an initial sync. 52 initialSyncDelay = 5 * time.Second 53 54 // nomadServicePrefix is the first prefix that scopes all Nomad registered 55 // services 56 nomadServicePrefix = "_nomad" 57 58 // The periodic time interval for syncing services and checks with Consul 59 syncInterval = 5 * time.Second 60 61 // syncJitter provides a little variance in the frequency at which 62 // Syncer polls Consul. 63 syncJitter = 8 64 65 // ttlCheckBuffer is the time interval that Nomad can take to report Consul 66 // the check result 67 ttlCheckBuffer = 31 * time.Second 68 69 // DefaultQueryWaitDuration is the max duration the Consul Agent will 70 // spend waiting for a response from a Consul Query. 71 DefaultQueryWaitDuration = 2 * time.Second 72 73 // ServiceTagHTTP is the tag assigned to HTTP services 74 ServiceTagHTTP = "http" 75 76 // ServiceTagRPC is the tag assigned to RPC services 77 ServiceTagRPC = "rpc" 78 79 // ServiceTagSerf is the tag assigned to Serf services 80 ServiceTagSerf = "serf" 81 ) 82 83 // consulServiceID and consulCheckID are the IDs registered with Consul 84 type consulServiceID string 85 type consulCheckID string 86 87 // ServiceKey is the generated service key that is used to build the Consul 88 // ServiceID 89 type ServiceKey string 90 91 // ServiceDomain is the domain of services registered by Nomad 92 type ServiceDomain string 93 94 const ( 95 ClientDomain ServiceDomain = "client" 96 ServerDomain ServiceDomain = "server" 97 ) 98 99 // NewExecutorDomain returns a domain specific to the alloc ID and task 100 func NewExecutorDomain(allocID, task string) ServiceDomain { 101 return ServiceDomain(fmt.Sprintf("executor-%s-%s", allocID, task)) 102 } 103 104 // Syncer allows syncing of services and checks with Consul 105 type Syncer struct { 106 client *consul.Client 107 consulAvailable bool 108 109 // servicesGroups and checkGroups are named groups of services and checks 110 // respectively that will be flattened and reconciled with Consul when 111 // SyncServices() is called. The key to the servicesGroups map is unique 112 // per handler and is used to allow the Agent's services to be maintained 113 // independently of the Client or Server's services. 114 servicesGroups map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration 115 checkGroups map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration 116 groupsLock sync.RWMutex 117 118 // The "Consul Registry" is a collection of Consul Services and 119 // Checks all guarded by the registryLock. 120 registryLock sync.RWMutex 121 122 // checkRunners are delegated Consul checks being ran by the Syncer 123 checkRunners map[consulCheckID]*CheckRunner 124 125 addrFinder func(portLabel string) (string, int) 126 createDelegatedCheck func(*structs.ServiceCheck, string) (Check, error) 127 delegateChecks map[string]struct{} // delegateChecks are the checks that the Nomad client runs and reports to Consul 128 // End registryLock guarded attributes. 129 130 logger *log.Logger 131 132 shutdownCh chan struct{} 133 shutdown bool 134 shutdownLock sync.Mutex 135 136 // notifyShutdownCh is used to notify a Syncer it needs to shutdown. 137 // This can happen because there was an explicit call to the Syncer's 138 // Shutdown() method, or because the calling task signaled the 139 // program is going to exit by closing its shutdownCh. 140 notifyShutdownCh chan struct{} 141 142 // periodicCallbacks is walked sequentially when the timer in Run 143 // fires. 144 periodicCallbacks map[string]types.PeriodicCallback 145 notifySyncCh chan struct{} 146 periodicLock sync.RWMutex 147 } 148 149 // NewSyncer returns a new consul.Syncer 150 func NewSyncer(consulConfig *config.ConsulConfig, shutdownCh chan struct{}, logger *log.Logger) (*Syncer, error) { 151 var consulClientConfig *consul.Config 152 var err error 153 consulClientConfig, err = consulConfig.ApiConfig() 154 if err != nil { 155 return nil, err 156 } 157 158 var consulClient *consul.Client 159 if consulClient, err = consul.NewClient(consulClientConfig); err != nil { 160 return nil, err 161 } 162 consulSyncer := Syncer{ 163 client: consulClient, 164 logger: logger, 165 consulAvailable: true, 166 shutdownCh: shutdownCh, 167 servicesGroups: make(map[ServiceDomain]map[ServiceKey]*consul.AgentServiceRegistration), 168 checkGroups: make(map[ServiceDomain]map[ServiceKey][]*consul.AgentCheckRegistration), 169 checkRunners: make(map[consulCheckID]*CheckRunner), 170 periodicCallbacks: make(map[string]types.PeriodicCallback), 171 // default noop implementation of addrFinder 172 addrFinder: func(string) (string, int) { return "", 0 }, 173 } 174 175 return &consulSyncer, nil 176 } 177 178 // SetDelegatedChecks sets the checks that nomad is going to run and report the 179 // result back to consul 180 func (c *Syncer) SetDelegatedChecks(delegateChecks map[string]struct{}, createDelegatedCheckFn func(*structs.ServiceCheck, string) (Check, error)) *Syncer { 181 c.delegateChecks = delegateChecks 182 c.createDelegatedCheck = createDelegatedCheckFn 183 return c 184 } 185 186 // SetAddrFinder sets a function to find the host and port for a Service given its port label 187 func (c *Syncer) SetAddrFinder(addrFinder func(string) (string, int)) *Syncer { 188 c.addrFinder = addrFinder 189 return c 190 } 191 192 // GenerateServiceKey should be called to generate a serviceKey based on the 193 // Service. 194 func GenerateServiceKey(service *structs.Service) ServiceKey { 195 var key string 196 numTags := len(service.Tags) 197 switch numTags { 198 case 0: 199 key = fmt.Sprintf("%s", service.Name) 200 default: 201 tags := strings.Join(service.Tags, "-") 202 key = fmt.Sprintf("%s-%s", service.Name, tags) 203 } 204 return ServiceKey(key) 205 } 206 207 // SetServices stores the map of Nomad Services to the provided service 208 // domain name. 209 func (c *Syncer) SetServices(domain ServiceDomain, services map[ServiceKey]*structs.Service) error { 210 var mErr multierror.Error 211 numServ := len(services) 212 registeredServices := make(map[ServiceKey]*consul.AgentServiceRegistration, numServ) 213 registeredChecks := make(map[ServiceKey][]*consul.AgentCheckRegistration, numServ) 214 for serviceKey, service := range services { 215 serviceReg, err := c.createService(service, domain, serviceKey) 216 if err != nil { 217 mErr.Errors = append(mErr.Errors, err) 218 continue 219 } 220 registeredServices[serviceKey] = serviceReg 221 222 // Register the check(s) for this service 223 for _, chk := range service.Checks { 224 // Create a Consul check registration 225 chkReg, err := c.createCheckReg(chk, serviceReg) 226 if err != nil { 227 mErr.Errors = append(mErr.Errors, err) 228 continue 229 } 230 231 // creating a nomad check if we have to handle this particular check type 232 c.registryLock.RLock() 233 if _, ok := c.delegateChecks[chk.Type]; ok { 234 _, ok := c.checkRunners[consulCheckID(chkReg.ID)] 235 c.registryLock.RUnlock() 236 if ok { 237 continue 238 } 239 240 nc, err := c.createDelegatedCheck(chk, chkReg.ID) 241 if err != nil { 242 mErr.Errors = append(mErr.Errors, err) 243 continue 244 } 245 246 cr := NewCheckRunner(nc, c.runCheck, c.logger) 247 c.registryLock.Lock() 248 // TODO type the CheckRunner 249 c.checkRunners[consulCheckID(nc.ID())] = cr 250 c.registryLock.Unlock() 251 } else { 252 c.registryLock.RUnlock() 253 } 254 255 registeredChecks[serviceKey] = append(registeredChecks[serviceKey], chkReg) 256 } 257 } 258 259 if len(mErr.Errors) > 0 { 260 return mErr.ErrorOrNil() 261 } 262 263 // Update the services and checks groups for this domain 264 c.groupsLock.Lock() 265 266 // Create map for service group if it doesn't exist 267 serviceKeys, ok := c.servicesGroups[domain] 268 if !ok { 269 serviceKeys = make(map[ServiceKey]*consul.AgentServiceRegistration, len(registeredServices)) 270 c.servicesGroups[domain] = serviceKeys 271 } 272 273 // Remove stale services 274 for existingServiceKey := range serviceKeys { 275 if _, ok := registeredServices[existingServiceKey]; !ok { 276 // Exisitng service needs to be removed 277 delete(serviceKeys, existingServiceKey) 278 } 279 } 280 281 // Add registered services 282 for serviceKey, service := range registeredServices { 283 serviceKeys[serviceKey] = service 284 } 285 286 // Create map for check group if it doesn't exist 287 checkKeys, ok := c.checkGroups[domain] 288 if !ok { 289 checkKeys = make(map[ServiceKey][]*consul.AgentCheckRegistration, len(registeredChecks)) 290 c.checkGroups[domain] = checkKeys 291 } 292 293 // Remove stale checks 294 for existingCheckKey := range checkKeys { 295 if _, ok := registeredChecks[existingCheckKey]; !ok { 296 // Exisitng check needs to be removed 297 delete(checkKeys, existingCheckKey) 298 } 299 } 300 301 // Add registered checks 302 for checkKey, checks := range registeredChecks { 303 checkKeys[checkKey] = checks 304 } 305 c.groupsLock.Unlock() 306 307 // Sync immediately 308 c.SyncNow() 309 310 return nil 311 } 312 313 // SyncNow expires the current timer forcing the list of periodic callbacks 314 // to be synced immediately. 315 func (c *Syncer) SyncNow() { 316 select { 317 case c.notifySyncCh <- struct{}{}: 318 default: 319 } 320 } 321 322 // flattenedServices returns a flattened list of services that are registered 323 // locally 324 func (c *Syncer) flattenedServices() []*consul.AgentServiceRegistration { 325 const initialNumServices = 8 326 services := make([]*consul.AgentServiceRegistration, 0, initialNumServices) 327 c.groupsLock.RLock() 328 defer c.groupsLock.RUnlock() 329 for _, servicesGroup := range c.servicesGroups { 330 for _, service := range servicesGroup { 331 services = append(services, service) 332 } 333 } 334 return services 335 } 336 337 // flattenedChecks returns a flattened list of checks that are registered 338 // locally 339 func (c *Syncer) flattenedChecks() []*consul.AgentCheckRegistration { 340 const initialNumChecks = 8 341 checks := make([]*consul.AgentCheckRegistration, 0, initialNumChecks) 342 c.groupsLock.RLock() 343 for _, checkGroup := range c.checkGroups { 344 for _, check := range checkGroup { 345 checks = append(checks, check...) 346 } 347 } 348 c.groupsLock.RUnlock() 349 return checks 350 } 351 352 func (c *Syncer) signalShutdown() { 353 select { 354 case c.notifyShutdownCh <- struct{}{}: 355 default: 356 } 357 } 358 359 // Shutdown de-registers the services and checks and shuts down periodic syncing 360 func (c *Syncer) Shutdown() error { 361 var mErr multierror.Error 362 363 c.shutdownLock.Lock() 364 if !c.shutdown { 365 c.shutdown = true 366 } 367 c.shutdownLock.Unlock() 368 369 c.signalShutdown() 370 371 // Stop all the checks that nomad is running 372 c.registryLock.RLock() 373 defer c.registryLock.RUnlock() 374 for _, cr := range c.checkRunners { 375 cr.Stop() 376 } 377 378 // De-register all the services registered by this syncer from Consul 379 services, err := c.queryAgentServices() 380 if err != nil { 381 mErr.Errors = append(mErr.Errors, err) 382 } 383 for serviceID := range services { 384 convertedID := string(serviceID) 385 if err := c.client.Agent().ServiceDeregister(convertedID); err != nil { 386 c.logger.Printf("[WARN] consul.syncer: failed to deregister service ID %+q: %v", convertedID, err) 387 mErr.Errors = append(mErr.Errors, err) 388 } 389 } 390 return mErr.ErrorOrNil() 391 } 392 393 // queryChecks queries the Consul Agent for a list of Consul checks that 394 // have been registered with this Consul Syncer. 395 func (c *Syncer) queryChecks() (map[consulCheckID]*consul.AgentCheck, error) { 396 checks, err := c.client.Agent().Checks() 397 if err != nil { 398 return nil, err 399 } 400 return c.filterConsulChecks(checks), nil 401 } 402 403 // queryAgentServices queries the Consul Agent for a list of Consul services that 404 // have been registered with this Consul Syncer. 405 func (c *Syncer) queryAgentServices() (map[consulServiceID]*consul.AgentService, error) { 406 services, err := c.client.Agent().Services() 407 if err != nil { 408 return nil, err 409 } 410 return c.filterConsulServices(services), nil 411 } 412 413 // syncChecks synchronizes this Syncer's Consul Checks with the Consul Agent. 414 func (c *Syncer) syncChecks() error { 415 var mErr multierror.Error 416 consulChecks, err := c.queryChecks() 417 if err != nil { 418 return err 419 } 420 421 // Synchronize checks with Consul 422 missingChecks, existingChecks, changedChecks, staleChecks := c.calcChecksDiff(consulChecks) 423 for _, check := range missingChecks { 424 if err := c.registerCheck(check); err != nil { 425 mErr.Errors = append(mErr.Errors, err) 426 } 427 } 428 for _, check := range existingChecks { 429 c.ensureCheckRunning(check) 430 } 431 for _, check := range changedChecks { 432 // NOTE(sean@): Do we need to deregister the check before 433 // re-registering it? Not deregistering to avoid missing the 434 // TTL but doesn't correct reconcile any possible drift with 435 // the check. 436 // 437 // if err := c.deregisterCheck(check.ID); err != nil { 438 // mErr.Errors = append(mErr.Errors, err) 439 // } 440 if err := c.registerCheck(check); err != nil { 441 mErr.Errors = append(mErr.Errors, err) 442 } 443 } 444 for _, check := range staleChecks { 445 if err := c.deregisterCheck(consulCheckID(check.ID)); err != nil { 446 mErr.Errors = append(mErr.Errors, err) 447 } 448 } 449 return mErr.ErrorOrNil() 450 } 451 452 // compareConsulCheck takes a consul.AgentCheckRegistration instance and 453 // compares it with a consul.AgentCheck. Returns true if they are equal 454 // according to consul.AgentCheck, otherwise false. 455 func compareConsulCheck(localCheck *consul.AgentCheckRegistration, consulCheck *consul.AgentCheck) bool { 456 if consulCheck.CheckID != localCheck.ID || 457 consulCheck.Name != localCheck.Name || 458 consulCheck.Notes != localCheck.Notes || 459 consulCheck.ServiceID != localCheck.ServiceID { 460 return false 461 } 462 return true 463 } 464 465 // calcChecksDiff takes the argument (consulChecks) and calculates the delta 466 // between the consul.Syncer's list of known checks (c.flattenedChecks()). 467 // Four arrays are returned: 468 // 469 // 1) a slice of checks that exist only locally in the Syncer and are missing 470 // from the Consul Agent (consulChecks) and therefore need to be registered. 471 // 472 // 2) a slice of checks that exist in both the local consul.Syncer's 473 // tracked list and Consul Agent (consulChecks). 474 // 475 // 3) a slice of checks that exist in both the local consul.Syncer's 476 // tracked list and Consul Agent (consulServices) but have diverged state. 477 // 478 // 4) a slice of checks that exist only in the Consul Agent (consulChecks) 479 // and should be removed because the Consul Agent has drifted from the 480 // Syncer. 481 func (c *Syncer) calcChecksDiff(consulChecks map[consulCheckID]*consul.AgentCheck) ( 482 missingChecks []*consul.AgentCheckRegistration, 483 equalChecks []*consul.AgentCheckRegistration, 484 changedChecks []*consul.AgentCheckRegistration, 485 staleChecks []*consul.AgentCheckRegistration) { 486 487 type mergedCheck struct { 488 check *consul.AgentCheckRegistration 489 // 'l' == Nomad local only 490 // 'e' == equal 491 // 'c' == changed 492 // 'a' == Consul agent only 493 state byte 494 } 495 var ( 496 localChecksCount = 0 497 equalChecksCount = 0 498 changedChecksCount = 0 499 agentChecks = 0 500 ) 501 flattenedChecks := c.flattenedChecks() 502 localChecks := make(map[string]*mergedCheck, len(flattenedChecks)+len(consulChecks)) 503 for _, localCheck := range flattenedChecks { 504 localChecksCount++ 505 localChecks[localCheck.ID] = &mergedCheck{localCheck, 'l'} 506 } 507 for _, consulCheck := range consulChecks { 508 if localCheck, found := localChecks[consulCheck.CheckID]; found { 509 localChecksCount-- 510 if compareConsulCheck(localCheck.check, consulCheck) { 511 equalChecksCount++ 512 localChecks[consulCheck.CheckID].state = 'e' 513 } else { 514 changedChecksCount++ 515 localChecks[consulCheck.CheckID].state = 'c' 516 } 517 } else { 518 agentChecks++ 519 agentCheckReg := &consul.AgentCheckRegistration{ 520 ID: consulCheck.CheckID, 521 Name: consulCheck.Name, 522 Notes: consulCheck.Notes, 523 ServiceID: consulCheck.ServiceID, 524 } 525 localChecks[consulCheck.CheckID] = &mergedCheck{agentCheckReg, 'a'} 526 } 527 } 528 529 missingChecks = make([]*consul.AgentCheckRegistration, 0, localChecksCount) 530 equalChecks = make([]*consul.AgentCheckRegistration, 0, equalChecksCount) 531 changedChecks = make([]*consul.AgentCheckRegistration, 0, changedChecksCount) 532 staleChecks = make([]*consul.AgentCheckRegistration, 0, agentChecks) 533 for _, check := range localChecks { 534 switch check.state { 535 case 'l': 536 missingChecks = append(missingChecks, check.check) 537 case 'e': 538 equalChecks = append(equalChecks, check.check) 539 case 'c': 540 changedChecks = append(changedChecks, check.check) 541 case 'a': 542 staleChecks = append(staleChecks, check.check) 543 } 544 } 545 546 return missingChecks, equalChecks, changedChecks, staleChecks 547 } 548 549 // compareConsulService takes a consul.AgentServiceRegistration instance and 550 // compares it with a consul.AgentService. Returns true if they are equal 551 // according to consul.AgentService, otherwise false. 552 func compareConsulService(localService *consul.AgentServiceRegistration, consulService *consul.AgentService) bool { 553 if consulService.ID != localService.ID || 554 consulService.Service != localService.Name || 555 consulService.Port != localService.Port || 556 consulService.Address != localService.Address || 557 consulService.EnableTagOverride != localService.EnableTagOverride { 558 return false 559 } 560 561 serviceTags := make(map[string]byte, len(localService.Tags)) 562 for _, tag := range localService.Tags { 563 serviceTags[tag] = 'l' 564 } 565 for _, tag := range consulService.Tags { 566 if _, found := serviceTags[tag]; !found { 567 return false 568 } 569 serviceTags[tag] = 'b' 570 } 571 for _, state := range serviceTags { 572 if state == 'l' { 573 return false 574 } 575 } 576 577 return true 578 } 579 580 // calcServicesDiff takes the argument (consulServices) and calculates the 581 // delta between the consul.Syncer's list of known services 582 // (c.flattenedServices()). Four arrays are returned: 583 // 584 // 1) a slice of services that exist only locally in the Syncer and are 585 // missing from the Consul Agent (consulServices) and therefore need to be 586 // registered. 587 // 588 // 2) a slice of services that exist in both the local consul.Syncer's 589 // tracked list and Consul Agent (consulServices) *AND* are identical. 590 // 591 // 3) a slice of services that exist in both the local consul.Syncer's 592 // tracked list and Consul Agent (consulServices) but have diverged state. 593 // 594 // 4) a slice of services that exist only in the Consul Agent 595 // (consulServices) and should be removed because the Consul Agent has 596 // drifted from the Syncer. 597 func (c *Syncer) calcServicesDiff(consulServices map[consulServiceID]*consul.AgentService) (missingServices []*consul.AgentServiceRegistration, equalServices []*consul.AgentServiceRegistration, changedServices []*consul.AgentServiceRegistration, staleServices []*consul.AgentServiceRegistration) { 598 type mergedService struct { 599 service *consul.AgentServiceRegistration 600 // 'l' == Nomad local only 601 // 'e' == equal 602 // 'c' == changed 603 // 'a' == Consul agent only 604 state byte 605 } 606 var ( 607 localServicesCount = 0 608 equalServicesCount = 0 609 changedServicesCount = 0 610 agentServices = 0 611 ) 612 flattenedServices := c.flattenedServices() 613 localServices := make(map[string]*mergedService, len(flattenedServices)+len(consulServices)) 614 for _, localService := range flattenedServices { 615 localServicesCount++ 616 localServices[localService.ID] = &mergedService{localService, 'l'} 617 } 618 for _, consulService := range consulServices { 619 if localService, found := localServices[consulService.ID]; found { 620 localServicesCount-- 621 if compareConsulService(localService.service, consulService) { 622 equalServicesCount++ 623 localServices[consulService.ID].state = 'e' 624 } else { 625 changedServicesCount++ 626 localServices[consulService.ID].state = 'c' 627 } 628 } else { 629 agentServices++ 630 agentServiceReg := &consul.AgentServiceRegistration{ 631 ID: consulService.ID, 632 Name: consulService.Service, 633 Tags: consulService.Tags, 634 Port: consulService.Port, 635 Address: consulService.Address, 636 } 637 localServices[consulService.ID] = &mergedService{agentServiceReg, 'a'} 638 } 639 } 640 641 missingServices = make([]*consul.AgentServiceRegistration, 0, localServicesCount) 642 equalServices = make([]*consul.AgentServiceRegistration, 0, equalServicesCount) 643 changedServices = make([]*consul.AgentServiceRegistration, 0, changedServicesCount) 644 staleServices = make([]*consul.AgentServiceRegistration, 0, agentServices) 645 for _, service := range localServices { 646 switch service.state { 647 case 'l': 648 missingServices = append(missingServices, service.service) 649 case 'e': 650 equalServices = append(equalServices, service.service) 651 case 'c': 652 changedServices = append(changedServices, service.service) 653 case 'a': 654 staleServices = append(staleServices, service.service) 655 } 656 } 657 658 return missingServices, equalServices, changedServices, staleServices 659 } 660 661 // syncServices synchronizes this Syncer's Consul Services with the Consul 662 // Agent. 663 func (c *Syncer) syncServices() error { 664 consulServices, err := c.queryAgentServices() 665 if err != nil { 666 return err 667 } 668 669 // Synchronize services with Consul 670 var mErr multierror.Error 671 missingServices, _, changedServices, removedServices := c.calcServicesDiff(consulServices) 672 for _, service := range missingServices { 673 if err := c.client.Agent().ServiceRegister(service); err != nil { 674 mErr.Errors = append(mErr.Errors, err) 675 } 676 } 677 for _, service := range changedServices { 678 // Re-register the local service 679 if err := c.client.Agent().ServiceRegister(service); err != nil { 680 mErr.Errors = append(mErr.Errors, err) 681 } 682 } 683 for _, service := range removedServices { 684 if err := c.deregisterService(service.ID); err != nil { 685 mErr.Errors = append(mErr.Errors, err) 686 } 687 } 688 return mErr.ErrorOrNil() 689 } 690 691 // registerCheck registers a check definition with Consul 692 func (c *Syncer) registerCheck(chkReg *consul.AgentCheckRegistration) error { 693 c.registryLock.RLock() 694 if cr, ok := c.checkRunners[consulCheckID(chkReg.ID)]; ok { 695 cr.Start() 696 } 697 c.registryLock.RUnlock() 698 return c.client.Agent().CheckRegister(chkReg) 699 } 700 701 // ensureCheckRunning starts the check runner for a check if it's not already running 702 func (c *Syncer) ensureCheckRunning(chk *consul.AgentCheckRegistration) { 703 c.registryLock.RLock() 704 defer c.registryLock.RUnlock() 705 if cr, ok := c.checkRunners[consulCheckID(chk.ID)]; ok && !cr.Started() { 706 c.logger.Printf("[DEBUG] consul.syncer: starting runner for existing check. %v", chk.ID) 707 cr.Start() 708 } 709 } 710 711 // createCheckReg creates a Check that can be registered with Nomad. It also 712 // creates a Nomad check for the check types that it can handle. 713 func (c *Syncer) createCheckReg(check *structs.ServiceCheck, serviceReg *consul.AgentServiceRegistration) (*consul.AgentCheckRegistration, error) { 714 chkReg := consul.AgentCheckRegistration{ 715 ID: check.Hash(serviceReg.ID), 716 Name: check.Name, 717 ServiceID: serviceReg.ID, 718 } 719 chkReg.Timeout = check.Timeout.String() 720 chkReg.Interval = check.Interval.String() 721 host, port := serviceReg.Address, serviceReg.Port 722 if check.PortLabel != "" { 723 host, port = c.addrFinder(check.PortLabel) 724 } 725 switch check.Type { 726 case structs.ServiceCheckHTTP: 727 if check.Protocol == "" { 728 check.Protocol = "http" 729 } 730 base := url.URL{ 731 Scheme: check.Protocol, 732 Host: net.JoinHostPort(host, strconv.Itoa(port)), 733 } 734 relative, err := url.Parse(check.Path) 735 if err != nil { 736 return nil, err 737 } 738 url := base.ResolveReference(relative) 739 chkReg.HTTP = url.String() 740 case structs.ServiceCheckTCP: 741 chkReg.TCP = net.JoinHostPort(host, strconv.Itoa(port)) 742 case structs.ServiceCheckScript: 743 chkReg.TTL = (check.Interval + ttlCheckBuffer).String() 744 default: 745 return nil, fmt.Errorf("check type %+q not valid", check.Type) 746 } 747 chkReg.Status = check.InitialStatus 748 return &chkReg, nil 749 } 750 751 // generateConsulServiceID takes the domain and service key and returns a Consul 752 // ServiceID 753 func generateConsulServiceID(domain ServiceDomain, key ServiceKey) consulServiceID { 754 return consulServiceID(fmt.Sprintf("%s-%s-%s", nomadServicePrefix, domain, key)) 755 } 756 757 // createService creates a Consul AgentService from a Nomad ConsulService. 758 func (c *Syncer) createService(service *structs.Service, domain ServiceDomain, key ServiceKey) (*consul.AgentServiceRegistration, error) { 759 c.registryLock.RLock() 760 defer c.registryLock.RUnlock() 761 762 srv := consul.AgentServiceRegistration{ 763 ID: string(generateConsulServiceID(domain, key)), 764 Name: service.Name, 765 Tags: service.Tags, 766 } 767 host, port := c.addrFinder(service.PortLabel) 768 if host != "" { 769 srv.Address = host 770 } 771 772 if port != 0 { 773 srv.Port = port 774 } 775 776 return &srv, nil 777 } 778 779 // deregisterService de-registers a service with the given ID from consul 780 func (c *Syncer) deregisterService(serviceID string) error { 781 return c.client.Agent().ServiceDeregister(serviceID) 782 } 783 784 // deregisterCheck de-registers a check from Consul 785 func (c *Syncer) deregisterCheck(id consulCheckID) error { 786 c.registryLock.Lock() 787 defer c.registryLock.Unlock() 788 789 // Deleting from Consul Agent 790 if err := c.client.Agent().CheckDeregister(string(id)); err != nil { 791 // CheckDeregister() will be reattempted again in a future 792 // sync. 793 return err 794 } 795 796 // Remove the check from the local registry 797 if cr, ok := c.checkRunners[id]; ok { 798 cr.Stop() 799 delete(c.checkRunners, id) 800 } 801 802 return nil 803 } 804 805 // Run triggers periodic syncing of services and checks with Consul. This is 806 // a long lived go-routine which is stopped during shutdown. 807 func (c *Syncer) Run() { 808 sync := time.NewTimer(0) 809 for { 810 select { 811 case <-sync.C: 812 d := syncInterval - lib.RandomStagger(syncInterval/syncJitter) 813 sync.Reset(d) 814 815 if err := c.SyncServices(); err != nil { 816 if c.consulAvailable { 817 c.logger.Printf("[DEBUG] consul.syncer: error in syncing: %v", err) 818 } 819 c.consulAvailable = false 820 } else { 821 if !c.consulAvailable { 822 c.logger.Printf("[DEBUG] consul.syncer: syncs succesful") 823 } 824 c.consulAvailable = true 825 } 826 case <-c.notifySyncCh: 827 sync.Reset(syncInterval) 828 case <-c.shutdownCh: 829 c.Shutdown() 830 case <-c.notifyShutdownCh: 831 sync.Stop() 832 c.logger.Printf("[INFO] consul.syncer: shutting down syncer ") 833 return 834 } 835 } 836 } 837 838 // RunHandlers executes each handler (randomly) 839 func (c *Syncer) RunHandlers() error { 840 c.periodicLock.RLock() 841 handlers := make(map[string]types.PeriodicCallback, len(c.periodicCallbacks)) 842 for name, fn := range c.periodicCallbacks { 843 handlers[name] = fn 844 } 845 c.periodicLock.RUnlock() 846 847 var mErr multierror.Error 848 for _, fn := range handlers { 849 if err := fn(); err != nil { 850 mErr.Errors = append(mErr.Errors, err) 851 } 852 } 853 return mErr.ErrorOrNil() 854 } 855 856 // SyncServices sync the services with the Consul Agent 857 func (c *Syncer) SyncServices() error { 858 var mErr multierror.Error 859 if err := c.syncServices(); err != nil { 860 mErr.Errors = append(mErr.Errors, err) 861 } 862 if err := c.syncChecks(); err != nil { 863 mErr.Errors = append(mErr.Errors, err) 864 } 865 if err := c.RunHandlers(); err != nil { 866 return err 867 } 868 return mErr.ErrorOrNil() 869 } 870 871 // filterConsulServices prunes out all the service who were not registered with 872 // the syncer 873 func (c *Syncer) filterConsulServices(consulServices map[string]*consul.AgentService) map[consulServiceID]*consul.AgentService { 874 localServices := make(map[consulServiceID]*consul.AgentService, len(consulServices)) 875 c.registryLock.RLock() 876 defer c.registryLock.RUnlock() 877 for serviceID, service := range consulServices { 878 for domain := range c.servicesGroups { 879 if strings.HasPrefix(service.ID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) { 880 localServices[consulServiceID(serviceID)] = service 881 break 882 } 883 } 884 } 885 return localServices 886 } 887 888 // filterConsulChecks prunes out all the consul checks which do not have 889 // services with Syncer's idPrefix. 890 func (c *Syncer) filterConsulChecks(consulChecks map[string]*consul.AgentCheck) map[consulCheckID]*consul.AgentCheck { 891 localChecks := make(map[consulCheckID]*consul.AgentCheck, len(consulChecks)) 892 c.registryLock.RLock() 893 defer c.registryLock.RUnlock() 894 for checkID, check := range consulChecks { 895 for domain := range c.checkGroups { 896 if strings.HasPrefix(check.ServiceID, fmt.Sprintf("%s-%s", nomadServicePrefix, domain)) { 897 localChecks[consulCheckID(checkID)] = check 898 break 899 } 900 } 901 } 902 return localChecks 903 } 904 905 // consulPresent indicates whether the Consul Agent is responding 906 func (c *Syncer) consulPresent() bool { 907 _, err := c.client.Agent().Self() 908 return err == nil 909 } 910 911 // runCheck runs a check and updates the corresponding ttl check in consul 912 func (c *Syncer) runCheck(check Check) { 913 res := check.Run() 914 if res.Duration >= check.Timeout() { 915 c.logger.Printf("[DEBUG] consul.syncer: check took time: %v, timeout: %v", res.Duration, check.Timeout()) 916 } 917 state := consul.HealthCritical 918 output := res.Output 919 switch res.ExitCode { 920 case 0: 921 state = consul.HealthPassing 922 case 1: 923 state = consul.HealthWarning 924 default: 925 state = consul.HealthCritical 926 } 927 if res.Err != nil { 928 state = consul.HealthCritical 929 output = res.Err.Error() 930 } 931 if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil { 932 if c.consulAvailable { 933 c.logger.Printf("[DEBUG] consul.syncer: check %+q failed, disabling Consul checks until until next successful sync: %v", check.ID(), err) 934 c.consulAvailable = false 935 } else { 936 c.consulAvailable = true 937 } 938 } 939 } 940 941 // ReapUnmatched prunes all services that do not exist in the passed domains 942 func (c *Syncer) ReapUnmatched(domains []ServiceDomain) error { 943 servicesInConsul, err := c.ConsulClient().Agent().Services() 944 if err != nil { 945 return err 946 } 947 948 var mErr multierror.Error 949 for serviceID := range servicesInConsul { 950 // Skip any service that was not registered by Nomad 951 if !strings.HasPrefix(serviceID, nomadServicePrefix) { 952 continue 953 } 954 955 // Filter services that do not exist in the desired domains 956 match := false 957 for _, domain := range domains { 958 // Include the hyphen so it is explicit to that domain otherwise it 959 // maybe a subset match 960 desired := fmt.Sprintf("%s-%s-", nomadServicePrefix, domain) 961 if strings.HasPrefix(serviceID, desired) { 962 match = true 963 break 964 } 965 } 966 967 if !match { 968 if err := c.deregisterService(serviceID); err != nil { 969 mErr.Errors = append(mErr.Errors, err) 970 } 971 } 972 } 973 974 return mErr.ErrorOrNil() 975 } 976 977 // AddPeriodicHandler adds a uniquely named callback. Returns true if 978 // successful, false if a handler with the same name already exists. 979 func (c *Syncer) AddPeriodicHandler(name string, fn types.PeriodicCallback) bool { 980 c.periodicLock.Lock() 981 defer c.periodicLock.Unlock() 982 if _, found := c.periodicCallbacks[name]; found { 983 c.logger.Printf("[ERROR] consul.syncer: failed adding handler %+q", name) 984 return false 985 } 986 c.periodicCallbacks[name] = fn 987 return true 988 } 989 990 // NumHandlers returns the number of callbacks registered with the syncer 991 func (c *Syncer) NumHandlers() int { 992 c.periodicLock.RLock() 993 defer c.periodicLock.RUnlock() 994 return len(c.periodicCallbacks) 995 } 996 997 // RemovePeriodicHandler removes a handler with a given name. 998 func (c *Syncer) RemovePeriodicHandler(name string) { 999 c.periodicLock.Lock() 1000 defer c.periodicLock.Unlock() 1001 delete(c.periodicCallbacks, name) 1002 } 1003 1004 // ConsulClient returns the Consul client used by the Syncer. 1005 func (c *Syncer) ConsulClient() *consul.Client { 1006 return c.client 1007 }