github.com/djenriquez/nomad-1@v0.8.1/client/fingerprint_manager.go (about)

     1  package client
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/hashicorp/nomad/client/config"
    10  	"github.com/hashicorp/nomad/client/driver"
    11  	"github.com/hashicorp/nomad/client/fingerprint"
    12  	cstructs "github.com/hashicorp/nomad/client/structs"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  )
    15  
    16  // FingerprintManager runs a client fingerprinters on a continuous basis, and
    17  // updates the client when the node has changed
    18  type FingerprintManager struct {
    19  	getConfig  func() *config.Config
    20  	node       *structs.Node
    21  	nodeLock   sync.Mutex
    22  	shutdownCh chan struct{}
    23  
    24  	// updateNodeAttributes is a callback to the client to update the state of its
    25  	// associated node
    26  	updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node
    27  
    28  	// updateNodeFromDriver is a callback to the client to update the state of a
    29  	// specific driver for the node
    30  	updateNodeFromDriver func(string, *structs.DriverInfo, *structs.DriverInfo) *structs.Node
    31  	logger               *log.Logger
    32  }
    33  
    34  // NewFingerprintManager is a constructor that creates and returns an instance
    35  // of FingerprintManager
    36  func NewFingerprintManager(getConfig func() *config.Config,
    37  	node *structs.Node,
    38  	shutdownCh chan struct{},
    39  	updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node,
    40  	updateNodeFromDriver func(string, *structs.DriverInfo, *structs.DriverInfo) *structs.Node,
    41  	logger *log.Logger) *FingerprintManager {
    42  	return &FingerprintManager{
    43  		getConfig:            getConfig,
    44  		updateNodeAttributes: updateNodeAttributes,
    45  		updateNodeFromDriver: updateNodeFromDriver,
    46  		node:                 node,
    47  		shutdownCh:           shutdownCh,
    48  		logger:               logger,
    49  	}
    50  }
    51  
    52  // setNode updates the current client node
    53  func (fm *FingerprintManager) setNode(node *structs.Node) {
    54  	fm.nodeLock.Lock()
    55  	defer fm.nodeLock.Unlock()
    56  	fm.node = node
    57  }
    58  
    59  // getNode returns the current client node
    60  func (fm *FingerprintManager) getNode() *structs.Node {
    61  	fm.nodeLock.Lock()
    62  	defer fm.nodeLock.Unlock()
    63  	return fm.node
    64  }
    65  
    66  // Run starts the process of fingerprinting the node. It does an initial pass,
    67  // identifying whitelisted and blacklisted fingerprints/drivers. Then, for
    68  // those which require periotic checking, it starts a periodic process for
    69  // each.
    70  func (fp *FingerprintManager) Run() error {
    71  	// First, set up all fingerprints
    72  	cfg := fp.getConfig()
    73  	whitelistFingerprints := cfg.ReadStringListToMap("fingerprint.whitelist")
    74  	whitelistFingerprintsEnabled := len(whitelistFingerprints) > 0
    75  	blacklistFingerprints := cfg.ReadStringListToMap("fingerprint.blacklist")
    76  
    77  	fp.logger.Printf("[DEBUG] client.fingerprint_manager: built-in fingerprints: %v", fingerprint.BuiltinFingerprints())
    78  
    79  	var availableFingerprints []string
    80  	var skippedFingerprints []string
    81  	for _, name := range fingerprint.BuiltinFingerprints() {
    82  		// Skip modules that are not in the whitelist if it is enabled.
    83  		if _, ok := whitelistFingerprints[name]; whitelistFingerprintsEnabled && !ok {
    84  			skippedFingerprints = append(skippedFingerprints, name)
    85  			continue
    86  		}
    87  		// Skip modules that are in the blacklist
    88  		if _, ok := blacklistFingerprints[name]; ok {
    89  			skippedFingerprints = append(skippedFingerprints, name)
    90  			continue
    91  		}
    92  
    93  		availableFingerprints = append(availableFingerprints, name)
    94  	}
    95  
    96  	if err := fp.setupFingerprinters(availableFingerprints); err != nil {
    97  		return err
    98  	}
    99  
   100  	if len(skippedFingerprints) != 0 {
   101  		fp.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprint modules skipped due to white/blacklist: %v", skippedFingerprints)
   102  	}
   103  
   104  	// Next, set up drivers
   105  	// Build the white/blacklists of drivers.
   106  	whitelistDrivers := cfg.ReadStringListToMap("driver.whitelist")
   107  	whitelistDriversEnabled := len(whitelistDrivers) > 0
   108  	blacklistDrivers := cfg.ReadStringListToMap("driver.blacklist")
   109  
   110  	var availDrivers []string
   111  	var skippedDrivers []string
   112  
   113  	for name := range driver.BuiltinDrivers {
   114  		// Skip fingerprinting drivers that are not in the whitelist if it is
   115  		// enabled.
   116  		if _, ok := whitelistDrivers[name]; whitelistDriversEnabled && !ok {
   117  			skippedDrivers = append(skippedDrivers, name)
   118  			continue
   119  		}
   120  		// Skip fingerprinting drivers that are in the blacklist
   121  		if _, ok := blacklistDrivers[name]; ok {
   122  			skippedDrivers = append(skippedDrivers, name)
   123  			continue
   124  		}
   125  
   126  		availDrivers = append(availDrivers, name)
   127  	}
   128  
   129  	if err := fp.setupDrivers(availDrivers); err != nil {
   130  		return err
   131  	}
   132  
   133  	if len(skippedDrivers) > 0 {
   134  		fp.logger.Printf("[DEBUG] client.fingerprint_manager: drivers skipped due to white/blacklist: %v", skippedDrivers)
   135  	}
   136  	return nil
   137  }
   138  
   139  // setupFingerprints is used to fingerprint the node to see if these attributes are
   140  // supported
   141  func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error {
   142  	var appliedFingerprints []string
   143  
   144  	for _, name := range fingerprints {
   145  		f, err := fingerprint.NewFingerprint(name, fm.logger)
   146  
   147  		if err != nil {
   148  			fm.logger.Printf("[ERR] client.fingerprint_manager: fingerprinting for %v failed: %+v", name, err)
   149  			return err
   150  		}
   151  
   152  		detected, err := fm.fingerprint(name, f)
   153  		if err != nil {
   154  			return err
   155  		}
   156  
   157  		// log the fingerprinters which have been applied
   158  		if detected {
   159  			appliedFingerprints = append(appliedFingerprints, name)
   160  		}
   161  
   162  		p, period := f.Periodic()
   163  		if p {
   164  			go fm.runFingerprint(f, period, name)
   165  		}
   166  	}
   167  
   168  	fm.logger.Printf("[DEBUG] client.fingerprint_manager: detected fingerprints %v", appliedFingerprints)
   169  	return nil
   170  }
   171  
   172  // setupDrivers is used to fingerprint the node to see if these drivers are
   173  // supported
   174  func (fm *FingerprintManager) setupDrivers(drivers []string) error {
   175  	var availDrivers []string
   176  	driverCtx := driver.NewDriverContext("", "", fm.getConfig(), fm.getNode(), fm.logger, nil)
   177  	for _, name := range drivers {
   178  
   179  		d, err := driver.NewDriver(name, driverCtx)
   180  		if err != nil {
   181  			return err
   182  		}
   183  
   184  		// Pass true for whether the health check is periodic here, so that the
   185  		// fingerprinter will not set the initial health check status (this is set
   186  		// below, with an empty health status so that a node event is not
   187  		// triggered)
   188  		// Later, the periodic health checker will update this value for drivers
   189  		// where health checks are enabled.
   190  		detected, err := fm.fingerprintDriver(name, d, true)
   191  		if err != nil {
   192  			fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting driver %v failed: %+v", name, err)
   193  			return err
   194  		}
   195  
   196  		// Start a periodic watcher to detect changes to a drivers health and
   197  		// attributes.
   198  		go fm.watchDriver(d, name)
   199  
   200  		// Log the fingerprinters which have been applied
   201  		if detected {
   202  			availDrivers = append(availDrivers, name)
   203  		}
   204  	}
   205  
   206  	fm.logger.Printf("[DEBUG] client.fingerprint_manager: detected drivers %v", availDrivers)
   207  	return nil
   208  }
   209  
   210  // runFingerprint runs each fingerprinter individually on an ongoing basis
   211  func (fm *FingerprintManager) runFingerprint(f fingerprint.Fingerprint, period time.Duration, name string) {
   212  	fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting %s every %v", name, period)
   213  
   214  	timer := time.NewTimer(period)
   215  	defer timer.Stop()
   216  
   217  	for {
   218  		select {
   219  		case <-timer.C:
   220  			timer.Reset(period)
   221  
   222  			_, err := fm.fingerprint(name, f)
   223  			if err != nil {
   224  				fm.logger.Printf("[DEBUG] client.fingerprint_manager: periodic fingerprinting for %v failed: %+v", name, err)
   225  				continue
   226  			}
   227  
   228  		case <-fm.shutdownCh:
   229  			return
   230  		}
   231  	}
   232  }
   233  
   234  // fingerprint does an initial fingerprint of the client. If the fingerprinter
   235  // is meant to be run continuously, a process is launched to perform this
   236  // fingerprint on an ongoing basis in the background.
   237  func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint) (bool, error) {
   238  	var response cstructs.FingerprintResponse
   239  
   240  	fm.nodeLock.Lock()
   241  	request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node}
   242  	err := f.Fingerprint(request, &response)
   243  	fm.nodeLock.Unlock()
   244  
   245  	if err != nil {
   246  		return false, err
   247  	}
   248  
   249  	if node := fm.updateNodeAttributes(&response); node != nil {
   250  		fm.setNode(node)
   251  	}
   252  
   253  	return response.Detected, nil
   254  }
   255  
   256  // watchDrivers facilitates the different periods between fingerprint and
   257  // health checking a driver
   258  func (fm *FingerprintManager) watchDriver(d driver.Driver, name string) {
   259  	var fingerprintTicker, healthTicker <-chan time.Time
   260  
   261  	// Determine whether the fingerprinter is periodic and health checking
   262  	isPeriodic, fingerprintPeriod := d.Periodic()
   263  	hc, isHealthCheck := d.(fingerprint.HealthCheck)
   264  
   265  	// Nothing to do since the state of this driver will never change
   266  	if !isPeriodic && !isHealthCheck {
   267  		return
   268  	}
   269  
   270  	// Setup the required tickers
   271  	if isPeriodic {
   272  		ticker := time.NewTicker(fingerprintPeriod)
   273  		fingerprintTicker = ticker.C
   274  		defer ticker.Stop()
   275  		fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting driver %s every %v", name, fingerprintPeriod)
   276  	}
   277  
   278  	var isHealthCheckPeriodic bool
   279  	if isHealthCheck {
   280  		// Determine the interval at which to health check
   281  		req := &cstructs.HealthCheckIntervalRequest{}
   282  		var healthCheckResp cstructs.HealthCheckIntervalResponse
   283  
   284  		if err := hc.GetHealthCheckInterval(req, &healthCheckResp); err != nil {
   285  			fm.logger.Printf("[ERR] client.fingerprint_manager: error getting health check interval for driver %s: %v", name, err)
   286  		} else if healthCheckResp.Eligible {
   287  			isHealthCheckPeriodic = true
   288  			ticker := time.NewTicker(healthCheckResp.Period)
   289  			healthTicker = ticker.C
   290  			defer ticker.Stop()
   291  			fm.logger.Printf("[DEBUG] client.fingerprint_manager: health checking driver %s every %v", name, healthCheckResp.Period)
   292  		}
   293  	}
   294  
   295  	driverEverDetected := false
   296  	for {
   297  		select {
   298  		case <-fm.shutdownCh:
   299  			return
   300  		case <-fingerprintTicker:
   301  			if _, err := fm.fingerprintDriver(name, d, isHealthCheckPeriodic); err != nil {
   302  				fm.logger.Printf("[DEBUG] client.fingerprint_manager: periodic fingerprinting for driver %v failed: %+v", name, err)
   303  			}
   304  
   305  			fm.nodeLock.Lock()
   306  			driver, detected := fm.node.Drivers[name]
   307  
   308  			// Memoize the driver detected status, so that we know whether to run the
   309  			// health check or not.
   310  			if detected && driver != nil && driver.Detected {
   311  				if !driverEverDetected {
   312  					driverEverDetected = true
   313  				}
   314  			}
   315  			fm.nodeLock.Unlock()
   316  		case <-healthTicker:
   317  			if driverEverDetected {
   318  				if err := fm.runDriverHealthCheck(name, hc); err != nil {
   319  					fm.logger.Printf("[DEBUG] client.fingerprint_manager: health checking for %v failed: %v", name, err)
   320  				}
   321  			}
   322  		}
   323  	}
   324  }
   325  
   326  // fingerprintDriver is a temporary solution to move towards DriverInfo and
   327  // away from annotating a node's attributes to demonstrate support for a
   328  // particular driver. Takes the FingerprintResponse and converts it to the
   329  // proper DriverInfo update and then sets the prefix attributes as well
   330  func (fm *FingerprintManager) fingerprintDriver(name string, f fingerprint.Fingerprint, hasPeriodicHealthCheck bool) (bool, error) {
   331  	var response cstructs.FingerprintResponse
   332  
   333  	fm.nodeLock.Lock()
   334  
   335  	// Determine if the driver has been detected before.
   336  	originalNode, haveDriver := fm.node.Drivers[name]
   337  	firstDetection := !haveDriver
   338  
   339  	// Determine if the driver is healthy
   340  	var driverIsHealthy bool
   341  	if haveDriver && originalNode.Healthy {
   342  		driverIsHealthy = true
   343  	}
   344  
   345  	// Fingerprint the driver.
   346  	request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node}
   347  	err := f.Fingerprint(request, &response)
   348  	fm.nodeLock.Unlock()
   349  
   350  	if err != nil {
   351  		return false, err
   352  	}
   353  
   354  	// Remove the health check attribute indicating the status of the driver,
   355  	// as the overall driver info object should indicate this.
   356  	delete(response.Attributes, fmt.Sprintf("driver.%s", name))
   357  
   358  	fingerprintInfo := &structs.DriverInfo{
   359  		Attributes: response.Attributes,
   360  		Detected:   response.Detected,
   361  	}
   362  
   363  	// We set the health status based on the detection state of the driver if:
   364  	// * It is the first time we are fingerprinting the driver. This gives all
   365  	// drivers an initial health.
   366  	// * If the driver becomes undetected. This gives us an immediate unhealthy
   367  	// state and description when it transistions from detected and healthy to
   368  	// undetected.
   369  	// * If the driver does not have its own health checks. Then we always
   370  	// couple the states.
   371  	var healthInfo *structs.DriverInfo
   372  	if firstDetection || !hasPeriodicHealthCheck || !response.Detected && driverIsHealthy {
   373  		state := " "
   374  		if !response.Detected {
   375  			state = " not "
   376  		}
   377  
   378  		healthInfo = &structs.DriverInfo{
   379  			Healthy:           response.Detected,
   380  			HealthDescription: fmt.Sprintf("Driver %s is%sdetected", name, state),
   381  			UpdateTime:        time.Now(),
   382  		}
   383  	}
   384  
   385  	if node := fm.updateNodeFromDriver(name, fingerprintInfo, healthInfo); node != nil {
   386  		fm.setNode(node)
   387  	}
   388  
   389  	return response.Detected, nil
   390  }
   391  
   392  // runDriverHealthCheck checks the health of the specified resource.
   393  func (fm *FingerprintManager) runDriverHealthCheck(name string, hc fingerprint.HealthCheck) error {
   394  	request := &cstructs.HealthCheckRequest{}
   395  	var response cstructs.HealthCheckResponse
   396  	if err := hc.HealthCheck(request, &response); err != nil {
   397  		return err
   398  	}
   399  
   400  	// Update the status of the node irregardless if there was an error- in the
   401  	// case of periodic health checks, an error will occur if a health check
   402  	// fails
   403  	if node := fm.updateNodeFromDriver(name, nil, response.Drivers[name]); node != nil {
   404  		fm.setNode(node)
   405  	}
   406  
   407  	return nil
   408  }