github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/client/consul/sync.go (about)

     1  package consul
     2  
     3  import (
     4  	"crypto/tls"
     5  	"fmt"
     6  	"log"
     7  	"net/http"
     8  	"net/url"
     9  	"reflect"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	consul "github.com/hashicorp/consul/api"
    15  	"github.com/hashicorp/go-multierror"
    16  
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  )
    19  
    20  // ConsulService allows syncing of services and checks with Consul
    21  type ConsulService struct {
    22  	client   *consul.Client
    23  	availble bool
    24  
    25  	serviceIdentifier string              // serviceIdentifier is a token which identifies which task/alloc the service belongs to
    26  	delegateChecks    map[string]struct{} // delegateChecks are the checks that the Nomad client runs and reports to Consul
    27  	createCheck       func(*structs.ServiceCheck, string) (Check, error)
    28  	addrFinder        func(portLabel string) (string, int)
    29  
    30  	trackedServices map[string]*consul.AgentService
    31  	trackedChecks   map[string]*consul.AgentCheckRegistration
    32  	checkRunners    map[string]*CheckRunner
    33  
    34  	logger *log.Logger
    35  
    36  	shutdownCh   chan struct{}
    37  	shutdown     bool
    38  	shutdownLock sync.Mutex
    39  }
    40  
    41  // ConsulConfig is the configuration used to create a new ConsulService client
    42  type ConsulConfig struct {
    43  	Addr      string
    44  	Token     string
    45  	Auth      string
    46  	EnableSSL bool
    47  	VerifySSL bool
    48  	CAFile    string
    49  	CertFile  string
    50  	KeyFile   string
    51  }
    52  
    53  const (
    54  	// The periodic time interval for syncing services and checks with Consul
    55  	syncInterval = 5 * time.Second
    56  
    57  	// ttlCheckBuffer is the time interval that Nomad can take to report Consul
    58  	// the check result
    59  	ttlCheckBuffer = 31 * time.Second
    60  )
    61  
    62  // NewConsulService returns a new ConsulService
    63  func NewConsulService(config *ConsulConfig, logger *log.Logger) (*ConsulService, error) {
    64  	var err error
    65  	var c *consul.Client
    66  	cfg := consul.DefaultConfig()
    67  	if config.Addr != "" {
    68  		cfg.Address = config.Addr
    69  	}
    70  	if config.Token != "" {
    71  		cfg.Token = config.Token
    72  	}
    73  	if config.Auth != "" {
    74  		var username, password string
    75  		if strings.Contains(config.Auth, ":") {
    76  			split := strings.SplitN(config.Auth, ":", 2)
    77  			username = split[0]
    78  			password = split[1]
    79  		} else {
    80  			username = config.Auth
    81  		}
    82  
    83  		cfg.HttpAuth = &consul.HttpBasicAuth{
    84  			Username: username,
    85  			Password: password,
    86  		}
    87  	}
    88  	if config.EnableSSL {
    89  		cfg.Scheme = "https"
    90  		tlsCfg := consul.TLSConfig{
    91  			Address:            cfg.Address,
    92  			CAFile:             config.CAFile,
    93  			CertFile:           config.CertFile,
    94  			KeyFile:            config.KeyFile,
    95  			InsecureSkipVerify: !config.VerifySSL,
    96  		}
    97  		tlsClientCfg, err := consul.SetupTLSConfig(&tlsCfg)
    98  		if err != nil {
    99  			return nil, fmt.Errorf("error creating tls client config for consul: %v", err)
   100  		}
   101  		cfg.HttpClient.Transport = &http.Transport{
   102  			TLSClientConfig: tlsClientCfg,
   103  		}
   104  	}
   105  	if config.EnableSSL && !config.VerifySSL {
   106  		cfg.HttpClient.Transport = &http.Transport{
   107  			TLSClientConfig: &tls.Config{
   108  				InsecureSkipVerify: true,
   109  			},
   110  		}
   111  	}
   112  	if c, err = consul.NewClient(cfg); err != nil {
   113  		return nil, err
   114  	}
   115  	consulService := ConsulService{
   116  		client:          c,
   117  		logger:          logger,
   118  		trackedServices: make(map[string]*consul.AgentService),
   119  		trackedChecks:   make(map[string]*consul.AgentCheckRegistration),
   120  		checkRunners:    make(map[string]*CheckRunner),
   121  
   122  		shutdownCh: make(chan struct{}),
   123  	}
   124  	return &consulService, nil
   125  }
   126  
   127  // SetDelegatedChecks sets the checks that nomad is going to run and report the
   128  // result back to consul
   129  func (c *ConsulService) SetDelegatedChecks(delegateChecks map[string]struct{}, createCheck func(*structs.ServiceCheck, string) (Check, error)) *ConsulService {
   130  	c.delegateChecks = delegateChecks
   131  	c.createCheck = createCheck
   132  	return c
   133  }
   134  
   135  // SetAddrFinder sets a function to find the host and port for a Service given its port label
   136  func (c *ConsulService) SetAddrFinder(addrFinder func(string) (string, int)) *ConsulService {
   137  	c.addrFinder = addrFinder
   138  	return c
   139  }
   140  
   141  // SetServiceIdentifier sets the identifier of the services we are syncing with Consul
   142  func (c *ConsulService) SetServiceIdentifier(serviceIdentifier string) *ConsulService {
   143  	c.serviceIdentifier = serviceIdentifier
   144  	return c
   145  }
   146  
   147  // SyncServices sync the services with consul
   148  func (c *ConsulService) SyncServices(services []*structs.Service) error {
   149  	var mErr multierror.Error
   150  	taskServices := make(map[string]*consul.AgentService)
   151  	taskChecks := make(map[string]*consul.AgentCheckRegistration)
   152  
   153  	// Register Services and Checks that we don't know about or has changed
   154  	for _, service := range services {
   155  		srv, err := c.createService(service)
   156  		if err != nil {
   157  			mErr.Errors = append(mErr.Errors, err)
   158  			continue
   159  		}
   160  		trackedService, ok := c.trackedServices[srv.ID]
   161  		if (ok && !reflect.DeepEqual(trackedService, srv)) || !ok {
   162  			if err := c.registerService(srv); err != nil {
   163  				mErr.Errors = append(mErr.Errors, err)
   164  			}
   165  		}
   166  		c.trackedServices[srv.ID] = srv
   167  		taskServices[srv.ID] = srv
   168  
   169  		for _, chk := range service.Checks {
   170  			// Create a consul check registration
   171  			chkReg, err := c.createCheckReg(chk, srv)
   172  			if err != nil {
   173  				mErr.Errors = append(mErr.Errors, err)
   174  				continue
   175  			}
   176  			// creating a nomad check if we have to handle this particular check type
   177  			if _, ok := c.delegateChecks[chk.Type]; ok {
   178  				nc, err := c.createCheck(chk, chkReg.ID)
   179  				if err != nil {
   180  					mErr.Errors = append(mErr.Errors, err)
   181  					continue
   182  				}
   183  				cr := NewCheckRunner(nc, c.runCheck, c.logger)
   184  				c.checkRunners[nc.ID()] = cr
   185  			}
   186  
   187  			if _, ok := c.trackedChecks[chkReg.ID]; !ok {
   188  				if err := c.registerCheck(chkReg); err != nil {
   189  					mErr.Errors = append(mErr.Errors, err)
   190  				}
   191  			}
   192  			c.trackedChecks[chkReg.ID] = chkReg
   193  			taskChecks[chkReg.ID] = chkReg
   194  		}
   195  	}
   196  
   197  	// Remove services that are not present anymore
   198  	for _, service := range c.trackedServices {
   199  		if _, ok := taskServices[service.ID]; !ok {
   200  			if err := c.deregisterService(service.ID); err != nil {
   201  				mErr.Errors = append(mErr.Errors, err)
   202  			}
   203  			delete(c.trackedServices, service.ID)
   204  		}
   205  	}
   206  
   207  	// Remove the checks that are not present anymore
   208  	for checkID, _ := range c.trackedChecks {
   209  		if _, ok := taskChecks[checkID]; !ok {
   210  			if err := c.deregisterCheck(checkID); err != nil {
   211  				mErr.Errors = append(mErr.Errors, err)
   212  			}
   213  			delete(c.trackedChecks, checkID)
   214  		}
   215  	}
   216  	return mErr.ErrorOrNil()
   217  }
   218  
   219  // Shutdown de-registers the services and checks and shuts down periodic syncing
   220  func (c *ConsulService) Shutdown() error {
   221  	var mErr multierror.Error
   222  
   223  	c.shutdownLock.Lock()
   224  	if !c.shutdown {
   225  		close(c.shutdownCh)
   226  		c.shutdown = true
   227  	}
   228  	c.shutdownLock.Unlock()
   229  
   230  	// Stop all the checks that nomad is running
   231  	for _, cr := range c.checkRunners {
   232  		cr.Stop()
   233  	}
   234  
   235  	// De-register all the services from consul
   236  	for _, service := range c.trackedServices {
   237  		if err := c.client.Agent().ServiceDeregister(service.ID); err != nil {
   238  			mErr.Errors = append(mErr.Errors, err)
   239  		}
   240  	}
   241  	return mErr.ErrorOrNil()
   242  }
   243  
   244  // KeepServices removes services from consul which are not present in the list
   245  // of tasks passed to it
   246  func (c *ConsulService) KeepServices(services map[string]struct{}) error {
   247  	var mErr multierror.Error
   248  
   249  	// Get the services from Consul
   250  	cServices, err := c.client.Agent().Services()
   251  	if err != nil {
   252  		return err
   253  	}
   254  	cServices = c.filterConsulServices(cServices)
   255  
   256  	// Remove the services from consul which are not in any of the tasks
   257  	for _, service := range cServices {
   258  		if _, validService := services[service.ID]; !validService {
   259  			if err := c.deregisterService(service.ID); err != nil {
   260  				mErr.Errors = append(mErr.Errors, err)
   261  			}
   262  		}
   263  	}
   264  	return mErr.ErrorOrNil()
   265  }
   266  
   267  // registerCheck registers a check definition with Consul
   268  func (c *ConsulService) registerCheck(chkReg *consul.AgentCheckRegistration) error {
   269  	if cr, ok := c.checkRunners[chkReg.ID]; ok {
   270  		cr.Start()
   271  	}
   272  	return c.client.Agent().CheckRegister(chkReg)
   273  }
   274  
   275  // createCheckReg creates a Check that can be registered with Nomad. It also
   276  // creates a Nomad check for the check types that it can handle.
   277  func (c *ConsulService) createCheckReg(check *structs.ServiceCheck, service *consul.AgentService) (*consul.AgentCheckRegistration, error) {
   278  	chkReg := consul.AgentCheckRegistration{
   279  		ID:        check.Hash(service.ID),
   280  		Name:      check.Name,
   281  		ServiceID: service.ID,
   282  	}
   283  	chkReg.Timeout = check.Timeout.String()
   284  	chkReg.Interval = check.Interval.String()
   285  	switch check.Type {
   286  	case structs.ServiceCheckHTTP:
   287  		if check.Protocol == "" {
   288  			check.Protocol = "http"
   289  		}
   290  		url := url.URL{
   291  			Scheme: check.Protocol,
   292  			Host:   fmt.Sprintf("%s:%d", service.Address, service.Port),
   293  			Path:   check.Path,
   294  		}
   295  		chkReg.HTTP = url.String()
   296  	case structs.ServiceCheckTCP:
   297  		chkReg.TCP = fmt.Sprintf("%s:%d", service.Address, service.Port)
   298  	case structs.ServiceCheckScript:
   299  		chkReg.TTL = (check.Interval + ttlCheckBuffer).String()
   300  	default:
   301  		return nil, fmt.Errorf("check type %q not valid", check.Type)
   302  	}
   303  	return &chkReg, nil
   304  }
   305  
   306  // createService creates a Consul AgentService from a Nomad Service
   307  func (c *ConsulService) createService(service *structs.Service) (*consul.AgentService, error) {
   308  	srv := consul.AgentService{
   309  		ID:      service.ID(c.serviceIdentifier),
   310  		Service: service.Name,
   311  		Tags:    service.Tags,
   312  	}
   313  	host, port := c.addrFinder(service.PortLabel)
   314  	if host != "" {
   315  		srv.Address = host
   316  	}
   317  
   318  	if port != 0 {
   319  		srv.Port = port
   320  	}
   321  
   322  	return &srv, nil
   323  }
   324  
   325  // registerService registers a service with Consul
   326  func (c *ConsulService) registerService(service *consul.AgentService) error {
   327  	srvReg := consul.AgentServiceRegistration{
   328  		ID:      service.ID,
   329  		Name:    service.Service,
   330  		Tags:    service.Tags,
   331  		Port:    service.Port,
   332  		Address: service.Address,
   333  	}
   334  	return c.client.Agent().ServiceRegister(&srvReg)
   335  }
   336  
   337  // deregisterService de-registers a service with the given ID from consul
   338  func (c *ConsulService) deregisterService(ID string) error {
   339  	return c.client.Agent().ServiceDeregister(ID)
   340  }
   341  
   342  // deregisterCheck de-registers a check with a given ID from Consul.
   343  func (c *ConsulService) deregisterCheck(ID string) error {
   344  	// Deleting the nomad check
   345  	if cr, ok := c.checkRunners[ID]; ok {
   346  		cr.Stop()
   347  		delete(c.checkRunners, ID)
   348  	}
   349  
   350  	// Deleting from consul
   351  	return c.client.Agent().CheckDeregister(ID)
   352  }
   353  
   354  // PeriodicSync triggers periodic syncing of services and checks with Consul.
   355  // This is a long lived go-routine which is stopped during shutdown
   356  func (c *ConsulService) PeriodicSync() {
   357  	sync := time.NewTicker(syncInterval)
   358  	for {
   359  		select {
   360  		case <-sync.C:
   361  			if err := c.performSync(); err != nil {
   362  				if c.availble {
   363  					c.logger.Printf("[DEBUG] consul: error in syncing services for %q: %v", c.serviceIdentifier, err)
   364  				}
   365  				c.availble = false
   366  			} else {
   367  				c.availble = true
   368  			}
   369  		case <-c.shutdownCh:
   370  			sync.Stop()
   371  			c.logger.Printf("[INFO] consul: shutting down sync for %q", c.serviceIdentifier)
   372  			return
   373  		}
   374  	}
   375  }
   376  
   377  // performSync sync the services and checks we are tracking with Consul.
   378  func (c *ConsulService) performSync() error {
   379  	var mErr multierror.Error
   380  	cServices, err := c.client.Agent().Services()
   381  	if err != nil {
   382  		return err
   383  	}
   384  
   385  	cChecks, err := c.client.Agent().Checks()
   386  	if err != nil {
   387  		return err
   388  	}
   389  
   390  	// Add services and checks that consul doesn't have but we do
   391  	for serviceID, service := range c.trackedServices {
   392  		if _, ok := cServices[serviceID]; !ok {
   393  			if err := c.registerService(service); err != nil {
   394  				mErr.Errors = append(mErr.Errors, err)
   395  			}
   396  		}
   397  	}
   398  	for checkID, check := range c.trackedChecks {
   399  		if _, ok := cChecks[checkID]; !ok {
   400  			if err := c.registerCheck(check); err != nil {
   401  				mErr.Errors = append(mErr.Errors, err)
   402  			}
   403  		}
   404  	}
   405  
   406  	return mErr.ErrorOrNil()
   407  }
   408  
   409  // filterConsulServices prunes out all the service whose ids are not prefixed
   410  // with nomad-
   411  func (c *ConsulService) filterConsulServices(srvcs map[string]*consul.AgentService) map[string]*consul.AgentService {
   412  	nomadServices := make(map[string]*consul.AgentService)
   413  	for _, srv := range srvcs {
   414  		if strings.HasPrefix(srv.ID, structs.NomadConsulPrefix) &&
   415  			!strings.HasPrefix(srv.ID, structs.AgentServicePrefix) {
   416  			nomadServices[srv.ID] = srv
   417  		}
   418  	}
   419  	return nomadServices
   420  }
   421  
   422  // filterConsulChecks prunes out all the consul checks which do not have
   423  // services with id prefixed with noamd-
   424  func (c *ConsulService) filterConsulChecks(chks map[string]*consul.AgentCheck) map[string]*consul.AgentCheck {
   425  	nomadChecks := make(map[string]*consul.AgentCheck)
   426  	for _, chk := range chks {
   427  		if strings.HasPrefix(chk.ServiceID, structs.NomadConsulPrefix) {
   428  			nomadChecks[chk.CheckID] = chk
   429  		}
   430  	}
   431  	return nomadChecks
   432  }
   433  
   434  // consulPresent indicates whether the consul agent is responding
   435  func (c *ConsulService) consulPresent() bool {
   436  	_, err := c.client.Agent().Self()
   437  	return err == nil
   438  }
   439  
   440  // runCheck runs a check and updates the corresponding ttl check in consul
   441  func (c *ConsulService) runCheck(check Check) {
   442  	res := check.Run()
   443  	if res.Duration >= check.Timeout() {
   444  		c.logger.Printf("[DEBUG] consul.sync: check took time: %v, timeout: %v", res.Duration, check.Timeout())
   445  	}
   446  	state := consul.HealthCritical
   447  	output := res.Output
   448  	switch res.ExitCode {
   449  	case 0:
   450  		state = consul.HealthPassing
   451  	case 1:
   452  		state = consul.HealthWarning
   453  	default:
   454  		state = consul.HealthCritical
   455  	}
   456  	if res.Err != nil {
   457  		state = consul.HealthCritical
   458  		output = res.Err.Error()
   459  	}
   460  	if err := c.client.Agent().UpdateTTL(check.ID(), output, state); err != nil {
   461  		if c.availble {
   462  			c.logger.Printf("[DEBUG] consul.sync: error updating ttl check for check %q: %v", check.ID(), err)
   463  			c.availble = false
   464  		} else {
   465  			c.availble = true
   466  		}
   467  	}
   468  }
   469  
   470  // GenerateServiceIdentifier returns a service identifier based on an allocation
   471  // id and task name
   472  func GenerateServiceIdentifier(allocID string, taskName string) string {
   473  	return fmt.Sprintf("%s-%s", taskName, allocID)
   474  }