github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/client/consul/sync.go (about) 1 package consul 2 3 import ( 4 "crypto/tls" 5 "fmt" 6 "log" 7 "net/http" 8 "net/url" 9 "reflect" 10 "strings" 11 "sync" 12 "time" 13 14 consul "github.com/hashicorp/consul/api" 15 "github.com/hashicorp/go-multierror" 16 17 "github.com/hashicorp/nomad/nomad/structs" 18 ) 19 20 // ConsulService allows syncing of services and checks with Consul 21 type ConsulService struct { 22 client *consul.Client 23 availble bool 24 25 serviceIdentifier string // serviceIdentifier is a token which identifies which task/alloc the service belongs to 26 delegateChecks map[string]struct{} // delegateChecks are the checks that the Nomad client runs and reports to Consul 27 createCheck func(*structs.ServiceCheck, string) (Check, error) 28 addrFinder func(portLabel string) (string, int) 29 30 trackedServices map[string]*consul.AgentService 31 trackedChecks map[string]*consul.AgentCheckRegistration 32 checkRunners map[string]*CheckRunner 33 34 logger *log.Logger 35 36 shutdownCh chan struct{} 37 shutdown bool 38 shutdownLock sync.Mutex 39 } 40 41 // ConsulConfig is the configuration used to create a new ConsulService client 42 type ConsulConfig struct { 43 Addr string 44 Token string 45 Auth string 46 EnableSSL bool 47 VerifySSL bool 48 CAFile string 49 CertFile string 50 KeyFile string 51 } 52 53 const ( 54 // The periodic time interval for syncing services and checks with Consul 55 syncInterval = 5 * time.Second 56 57 // ttlCheckBuffer is the time interval that Nomad can take to report Consul 58 // the check result 59 ttlCheckBuffer = 31 * time.Second 60 ) 61 62 // NewConsulService returns a new ConsulService 63 func NewConsulService(config *ConsulConfig, logger *log.Logger) (*ConsulService, error) { 64 var err error 65 var c *consul.Client 66 cfg := consul.DefaultConfig() 67 if config.Addr != "" { 68 cfg.Address = config.Addr 69 } 70 if config.Token != "" { 71 cfg.Token = config.Token 72 } 73 if config.Auth != "" { 74 var username, password string 75 if strings.Contains(config.Auth, ":") { 76 split := strings.SplitN(config.Auth, ":", 2) 77 username = split[0] 78 password = split[1] 79 } else { 80 username = config.Auth 81 } 82 83 cfg.HttpAuth = &consul.HttpBasicAuth{ 84 Username: username, 85 Password: password, 86 } 87 } 88 if config.EnableSSL { 89 cfg.Scheme = "https" 90 tlsCfg := consul.TLSConfig{ 91 Address: cfg.Address, 92 CAFile: config.CAFile, 93 CertFile: config.CertFile, 94 KeyFile: config.KeyFile, 95 InsecureSkipVerify: !config.VerifySSL, 96 } 97 tlsClientCfg, err := consul.SetupTLSConfig(&tlsCfg) 98 if err != nil { 99 return nil, fmt.Errorf("error creating tls client config for consul: %v", err) 100 } 101 cfg.HttpClient.Transport = &http.Transport{ 102 TLSClientConfig: tlsClientCfg, 103 } 104 } 105 if config.EnableSSL && !config.VerifySSL { 106 cfg.HttpClient.Transport = &http.Transport{ 107 TLSClientConfig: &tls.Config{ 108 InsecureSkipVerify: true, 109 }, 110 } 111 } 112 if c, err = consul.NewClient(cfg); err != nil { 113 return nil, err 114 } 115 consulService := ConsulService{ 116 client: c, 117 logger: logger, 118 trackedServices: make(map[string]*consul.AgentService), 119 trackedChecks: make(map[string]*consul.AgentCheckRegistration), 120 checkRunners: make(map[string]*CheckRunner), 121 122 shutdownCh: make(chan struct{}), 123 } 124 return &consulService, nil 125 } 126 127 // SetDelegatedChecks sets the checks that nomad is going to run and report the 128 // result back to consul 129 func (c *ConsulService) SetDelegatedChecks(delegateChecks map[string]struct{}, createCheck func(*structs.ServiceCheck, string) (Check, error)) *ConsulService { 130 c.delegateChecks = delegateChecks 131 c.createCheck = createCheck 132 return c 133 } 134 135 // SetAddrFinder sets a function to find the host and port for a Service given its port label 136 func (c *ConsulService) SetAddrFinder(addrFinder func(string) (string, int)) *ConsulService { 137 c.addrFinder = addrFinder 138 return c 139 } 140 141 // SetServiceIdentifier sets the identifier of the services we are syncing with Consul 142 func (c *ConsulService) SetServiceIdentifier(serviceIdentifier string) *ConsulService { 143 c.serviceIdentifier = serviceIdentifier 144 return c 145 } 146 147 // SyncServices sync the services with consul 148 func (c *ConsulService) SyncServices(services []*structs.Service) error { 149 var mErr multierror.Error 150 taskServices := make(map[string]*consul.AgentService) 151 taskChecks := make(map[string]*consul.AgentCheckRegistration) 152 153 // Register Services and Checks that we don't know about or has changed 154 for _, service := range services { 155 srv, err := c.createService(service) 156 if err != nil { 157 mErr.Errors = append(mErr.Errors, err) 158 continue 159 } 160 trackedService, ok := c.trackedServices[srv.ID] 161 if (ok && !reflect.DeepEqual(trackedService, srv)) || !ok { 162 if err := c.registerService(srv); err != nil { 163 mErr.Errors = append(mErr.Errors, err) 164 } 165 } 166 c.trackedServices[srv.ID] = srv 167 taskServices[srv.ID] = srv 168 169 for _, chk := range service.Checks { 170 // Create a consul check registration 171 chkReg, err := c.createCheckReg(chk, srv) 172 if err != nil { 173 mErr.Errors = append(mErr.Errors, err) 174 continue 175 } 176 // creating a nomad check if we have to handle this particular check type 177 if _, ok := c.delegateChecks[chk.Type]; ok { 178 nc, err := c.createCheck(chk, chkReg.ID) 179 if err != nil { 180 mErr.Errors = append(mErr.Errors, err) 181 continue 182 } 183 cr := NewCheckRunner(nc, c.runCheck, c.logger) 184 c.checkRunners[nc.ID()] = cr 185 } 186 187 if _, ok := c.trackedChecks[chkReg.ID]; !ok { 188 if err := c.registerCheck(chkReg); err != nil { 189 mErr.Errors = append(mErr.Errors, err) 190 } 191 } 192 c.trackedChecks[chkReg.ID] = chkReg 193 taskChecks[chkReg.ID] = chkReg 194 } 195 } 196 197 // Remove services that are not present anymore 198 for _, service := range c.trackedServices { 199 if _, ok := taskServices[service.ID]; !ok { 200 if err := c.deregisterService(service.ID); err != nil { 201 mErr.Errors = append(mErr.Errors, err) 202 } 203 delete(c.trackedServices, service.ID) 204 } 205 } 206 207 // Remove the checks that are not present anymore 208 for checkID, _ := range c.trackedChecks { 209 if _, ok := taskChecks[checkID]; !ok { 210 if err := c.deregisterCheck(checkID); err != nil { 211 mErr.Errors = append(mErr.Errors, err) 212 } 213 delete(c.trackedChecks, checkID) 214 } 215 } 216 return mErr.ErrorOrNil() 217 } 218 219 // Shutdown de-registers the services and checks and shuts down periodic syncing 220 func (c *ConsulService) Shutdown() error { 221 var mErr multierror.Error 222 223 c.shutdownLock.Lock() 224 if !c.shutdown { 225 close(c.shutdownCh) 226 c.shutdown = true 227 } 228 c.shutdownLock.Unlock() 229 230 // Stop all the checks that nomad is running 231 for _, cr := range c.checkRunners { 232 cr.Stop() 233 } 234 235 // De-register all the services from consul 236 for _, service := range c.trackedServices { 237 if err := c.client.Agent().ServiceDeregister(service.ID); err != nil { 238 mErr.Errors = append(mErr.Errors, err) 239 } 240 } 241 return mErr.ErrorOrNil() 242 } 243 244 // KeepServices removes services from consul which are not present in the list 245 // of tasks passed to it 246 func (c *ConsulService) KeepServices(services map[string]struct{}) error { 247 var mErr multierror.Error 248 249 // Get the services from Consul 250 cServices, err := c.client.Agent().Services() 251 if err != nil { 252 return err 253 } 254 cServices = c.filterConsulServices(cServices) 255 256 // Remove the services from consul which are not in any of the tasks 257 for _, service := range cServices { 258 if _, validService := services[service.ID]; !validService { 259 if err := c.deregisterService(service.ID); err != nil { 260 mErr.Errors = append(mErr.Errors, err) 261 } 262 } 263 } 264 return mErr.ErrorOrNil() 265 } 266 267 // registerCheck registers a check definition with Consul 268 func (c *ConsulService) registerCheck(chkReg *consul.AgentCheckRegistration) error { 269 if cr, ok := c.checkRunners[chkReg.ID]; ok { 270 cr.Start() 271 } 272 return c.client.Agent().CheckRegister(chkReg) 273 } 274 275 // createCheckReg creates a Check that can be registered with Nomad. It also 276 // creates a Nomad check for the check types that it can handle. 277 func (c *ConsulService) createCheckReg(check *structs.ServiceCheck, service *consul.AgentService) (*consul.AgentCheckRegistration, error) { 278 chkReg := consul.AgentCheckRegistration{ 279 ID: check.Hash(service.ID), 280 Name: check.Name, 281 ServiceID: service.ID, 282 } 283 chkReg.Timeout = check.Timeout.String() 284 chkReg.Interval = check.Interval.String() 285 switch check.Type { 286 case structs.ServiceCheckHTTP: 287 if check.Protocol == "" { 288 check.Protocol = "http" 289 } 290 url := url.URL{ 291 Scheme: check.Protocol, 292 Host: fmt.Sprintf("%s:%d", service.Address, service.Port), 293 Path: check.Path, 294 } 295 chkReg.HTTP = url.String() 296 case structs.ServiceCheckTCP: 297 chkReg.TCP = fmt.Sprintf("%s:%d", service.Address, service.Port) 298 case structs.ServiceCheckScript: 299 chkReg.TTL = (check.Interval + ttlCheckBuffer).String() 300 default: 301 return nil, fmt.Errorf("check type %q not valid", check.Type) 302 } 303 return &chkReg, nil 304 } 305 306 // createService creates a Consul AgentService from a Nomad Service 307 func (c *ConsulService) createService(service *structs.Service) (*consul.AgentService, error) { 308 srv := consul.AgentService{ 309 ID: service.ID(c.serviceIdentifier), 310 Service: service.Name, 311 Tags: service.Tags, 312 } 313 host, port := c.addrFinder(service.PortLabel) 314 if host != "" { 315 srv.Address = host 316 } 317 318 if port != 0 { 319 srv.Port = port 320 } 321 322 return &srv, nil 323 } 324 325 // registerService registers a service with Consul 326 func (c *ConsulService) registerService(service *consul.AgentService) error { 327 srvReg := consul.AgentServiceRegistration{ 328 ID: service.ID, 329 Name: service.Service, 330 Tags: service.Tags, 331 Port: service.Port, 332 Address: service.Address, 333 } 334 return c.client.Agent().ServiceRegister(&srvReg) 335 } 336 337 // deregisterService de-registers a service with the given ID from consul 338 func (c *ConsulService) deregisterService(ID string) error { 339 return c.client.Agent().ServiceDeregister(ID) 340 } 341 342 // deregisterCheck de-registers a check with a given ID from Consul. 343 func (c *ConsulService) deregisterCheck(ID string) error { 344 // Deleting the nomad check 345 if cr, ok := c.checkRunners[ID]; ok { 346 cr.Stop() 347 delete(c.checkRunners, ID) 348 } 349 350 // Deleting from consul 351 return c.client.Agent().CheckDeregister(ID) 352 } 353 354 // PeriodicSync triggers periodic syncing of services and checks with Consul. 355 // This is a long lived go-routine which is stopped during shutdown 356 func (c *ConsulService) PeriodicSync() { 357 sync := time.NewTicker(syncInterval) 358 for { 359 select { 360 case <-sync.C: 361 if err := c.performSync(); err != nil { 362 if c.availble { 363 c.logger.Printf("[DEBUG] consul: error in syncing services for %q: %v", c.serviceIdentifier, err) 364 } 365 c.availble = false 366 } else { 367 c.availble = true 368 } 369 case <-c.shutdownCh: 370 sync.Stop() 371 c.logger.Printf("[INFO] consul: shutting down sync for %q", c.serviceIdentifier) 372 return 373 } 374 } 375 } 376 377 // performSync sync the services and checks we are tracking with Consul. 378 func (c *ConsulService) performSync() error { 379 var mErr multierror.Error 380 cServices, err := c.client.Agent().Services() 381 if err != nil { 382 return err 383 } 384 385 cChecks, err := c.client.Agent().Checks() 386 if err != nil { 387 return err 388 } 389 390 // Add services and checks that consul doesn't have but we do 391 for serviceID, service := range c.trackedServices { 392 if _, ok := cServices[serviceID]; !ok { 393 if err := c.registerService(service); err != nil { 394 mErr.Errors = append(mErr.Errors, err) 395 } 396 } 397 } 398 for checkID, check := range c.trackedChecks { 399 if _, ok := cChecks[checkID]; !ok { 400 if err := c.registerCheck(check); err != nil { 401 mErr.Errors = append(mErr.Errors, err) 402 } 403 } 404 } 405 406 return mErr.ErrorOrNil() 407 } 408 409 // filterConsulServices prunes out all the service whose ids are not prefixed 410 // with nomad- 411 func (c *ConsulService) filterConsulServices(srvcs map[string]*consul.AgentService) map[string]*consul.AgentService { 412 nomadServices := make(map[string]*consul.AgentService) 413 for _, srv := range srvcs { 414 if strings.HasPrefix(srv.ID, structs.NomadConsulPrefix) && 415 !strings.HasPrefix(srv.ID, structs.AgentServicePrefix) { 416 nomadServices[srv.ID] = srv 417 } 418 } 419 return nomadServices 420 } 421 422 // filterConsulChecks prunes out all the consul checks which do not have 423 // services with id prefixed with noamd- 424 func (c *ConsulService) filterConsulChecks(chks map[string]*consul.AgentCheck) map[string]*consul.AgentCheck { 425 nomadChecks := make(map[string]*consul.AgentCheck) 426 for _, chk := range chks { 427 if strings.HasPrefix(chk.ServiceID, structs.NomadConsulPrefix) { 428 nomadChecks[chk.CheckID] = chk 429 } 430 } 431 return nomadChecks 432 } 433 434 // consulPresent indicates whether the consul agent is responding 435 func (c *ConsulService) consulPresent() bool { 436 _, err := c.client.Agent().Self() 437 return err == nil 438 } 439 440 // runCheck runs a check and updates the corresponding ttl check in consul 441 func (c *ConsulService) runCheck(check Check) { 442 res := check.Run() 443 if res.Duration >= check.Timeout() { 444 c.logger.Printf("[DEBUG] consul.sync: check took time: %v, timeout: %v", res.Duration, check.Timeout()) 445 } 446 state := consul.HealthCritical 447 output := res.Output 448 switch res.ExitCode { 449 case 0: 450 state = consul.HealthPassing 451 case 1: 452 state = consul.HealthWarning 453 default: 454 state = consul.HealthCritical 455 } 456 if res.Err != nil { 457 state = consul.HealthCritical 458 output = res.Err.Error() 459 } 460 if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil { 461 if c.availble { 462 c.logger.Printf("[DEBUG] consul.sync: error updating ttl check for check %q: %v", check.ID(), err) 463 c.availble = false 464 } else { 465 c.availble = true 466 } 467 } 468 } 469 470 // GenerateServiceIdentifier returns a service identifier based on an allocation 471 // id and task name 472 func GenerateServiceIdentifier(allocID string, taskName string) string { 473 return fmt.Sprintf("%s-%s", taskName, allocID) 474 }