github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/volumewatcher/volumes_watcher.go (about)

     1  package volumewatcher
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"time"
     7  
     8  	log "github.com/hashicorp/go-hclog"
     9  	memdb "github.com/hashicorp/go-memdb"
    10  	"github.com/hashicorp/nomad/nomad/state"
    11  	"github.com/hashicorp/nomad/nomad/structs"
    12  )
    13  
    14  // Watcher is used to watch volumes and their allocations created
    15  // by the scheduler and trigger the scheduler when allocation health
    16  // transitions.
    17  type Watcher struct {
    18  	enabled bool
    19  	logger  log.Logger
    20  
    21  	// rpc contains the set of Server methods that can be used by
    22  	// the volumes watcher for RPC
    23  	rpc CSIVolumeRPC
    24  
    25  	// the ACL needed to send RPCs
    26  	leaderAcl string
    27  
    28  	// state is the state that is watched for state changes.
    29  	state *state.StateStore
    30  
    31  	// watchers is the set of active watchers, one per volume
    32  	watchers map[string]*volumeWatcher
    33  
    34  	// ctx and exitFn are used to cancel the watcher
    35  	ctx    context.Context
    36  	exitFn context.CancelFunc
    37  
    38  	// quiescentTimeout is the time we wait until the volume has "settled"
    39  	// before stopping the child watcher goroutines
    40  	quiescentTimeout time.Duration
    41  
    42  	wlock sync.RWMutex
    43  }
    44  
    45  var defaultQuiescentTimeout = time.Minute * 5
    46  
    47  // NewVolumesWatcher returns a volumes watcher that is used to watch
    48  // volumes and trigger the scheduler as needed.
    49  func NewVolumesWatcher(logger log.Logger, rpc CSIVolumeRPC, leaderAcl string) *Watcher {
    50  
    51  	// the leader step-down calls SetEnabled(false) which is what
    52  	// cancels this context, rather than passing in its own shutdown
    53  	// context
    54  	ctx, exitFn := context.WithCancel(context.Background())
    55  
    56  	return &Watcher{
    57  		rpc:              rpc,
    58  		logger:           logger.Named("volumes_watcher"),
    59  		ctx:              ctx,
    60  		exitFn:           exitFn,
    61  		leaderAcl:        leaderAcl,
    62  		quiescentTimeout: defaultQuiescentTimeout,
    63  	}
    64  }
    65  
    66  // SetEnabled is used to control if the watcher is enabled. The
    67  // watcher should only be enabled on the active leader. When being
    68  // enabled the state and leader's ACL is passed in as it is no longer
    69  // valid once a leader election has taken place.
    70  func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore, leaderAcl string) {
    71  	w.wlock.Lock()
    72  	defer w.wlock.Unlock()
    73  
    74  	wasEnabled := w.enabled
    75  	w.enabled = enabled
    76  	w.leaderAcl = leaderAcl
    77  
    78  	if state != nil {
    79  		w.state = state
    80  	}
    81  
    82  	// Flush the state to create the necessary objects
    83  	w.flush(enabled)
    84  
    85  	// If we are starting now, launch the watch daemon
    86  	if enabled && !wasEnabled {
    87  		go w.watchVolumes(w.ctx)
    88  	}
    89  }
    90  
    91  // flush is used to clear the state of the watcher
    92  func (w *Watcher) flush(enabled bool) {
    93  	// Stop all the watchers and clear it
    94  	for _, watcher := range w.watchers {
    95  		watcher.Stop()
    96  	}
    97  
    98  	// Kill everything associated with the watcher
    99  	if w.exitFn != nil {
   100  		w.exitFn()
   101  	}
   102  
   103  	w.watchers = make(map[string]*volumeWatcher, 32)
   104  	w.ctx, w.exitFn = context.WithCancel(context.Background())
   105  }
   106  
   107  // watchVolumes is the long lived go-routine that watches for volumes to
   108  // add and remove watchers on.
   109  func (w *Watcher) watchVolumes(ctx context.Context) {
   110  	vIndex := uint64(1)
   111  	for {
   112  		volumes, idx, err := w.getVolumes(ctx, vIndex)
   113  		if err != nil {
   114  			if err == context.Canceled {
   115  				return
   116  			}
   117  			w.logger.Error("failed to retrieve volumes", "error", err)
   118  		}
   119  
   120  		vIndex = idx // last-seen index
   121  		for _, v := range volumes {
   122  			if err := w.add(v); err != nil {
   123  				w.logger.Error("failed to track volume", "volume_id", v.ID, "error", err)
   124  			}
   125  
   126  		}
   127  	}
   128  }
   129  
   130  // getVolumes retrieves all volumes blocking at the given index.
   131  func (w *Watcher) getVolumes(ctx context.Context, minIndex uint64) ([]*structs.CSIVolume, uint64, error) {
   132  	resp, index, err := w.state.BlockingQuery(w.getVolumesImpl, minIndex, ctx)
   133  	if err != nil {
   134  		return nil, 0, err
   135  	}
   136  
   137  	return resp.([]*structs.CSIVolume), index, nil
   138  }
   139  
   140  // getVolumesImpl retrieves all volumes from the passed state store.
   141  func (w *Watcher) getVolumesImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
   142  
   143  	iter, err := state.CSIVolumes(ws)
   144  	if err != nil {
   145  		return nil, 0, err
   146  	}
   147  
   148  	var volumes []*structs.CSIVolume
   149  	for {
   150  		raw := iter.Next()
   151  		if raw == nil {
   152  			break
   153  		}
   154  		volume := raw.(*structs.CSIVolume)
   155  		volumes = append(volumes, volume)
   156  	}
   157  
   158  	// Use the last index that affected the volume table
   159  	index, err := state.Index("csi_volumes")
   160  	if err != nil {
   161  		return nil, 0, err
   162  	}
   163  
   164  	return volumes, index, nil
   165  }
   166  
   167  // add adds a volume to the watch list
   168  func (w *Watcher) add(v *structs.CSIVolume) error {
   169  	w.wlock.Lock()
   170  	defer w.wlock.Unlock()
   171  	_, err := w.addLocked(v)
   172  	return err
   173  }
   174  
   175  // addLocked adds a volume to the watch list and should only be called when
   176  // locked. Creating the volumeWatcher starts a go routine to .watch() it
   177  func (w *Watcher) addLocked(v *structs.CSIVolume) (*volumeWatcher, error) {
   178  	// Not enabled so no-op
   179  	if !w.enabled {
   180  		return nil, nil
   181  	}
   182  
   183  	// Already watched so trigger an update for the volume
   184  	if watcher, ok := w.watchers[v.ID+v.Namespace]; ok {
   185  		watcher.Notify(v)
   186  		return nil, nil
   187  	}
   188  
   189  	watcher := newVolumeWatcher(w, v)
   190  	w.watchers[v.ID+v.Namespace] = watcher
   191  
   192  	// Sending the first volume update here before we return ensures we've hit
   193  	// the run loop in the goroutine before freeing the lock. This prevents a
   194  	// race between shutting down the watcher and the blocking query.
   195  	//
   196  	// It also ensures that we don't drop events that happened during leadership
   197  	// transitions and didn't get completed by the prior leader
   198  	watcher.updateCh <- v
   199  	return watcher, nil
   200  }
   201  
   202  // removes a volume from the watch list
   203  func (w *Watcher) remove(volID string) {
   204  	w.wlock.Lock()
   205  	defer w.wlock.Unlock()
   206  	delete(w.watchers, volID)
   207  }