github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/volumewatcher/volume_watcher.go

github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/volumewatcher/volume_watcher.go (about)

     1  package volumewatcher
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"time"
     7  
     8  	log "github.com/hashicorp/go-hclog"
     9  	memdb "github.com/hashicorp/go-memdb"
    10  	multierror "github.com/hashicorp/go-multierror"
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/nomad/state"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  )
    15  
    16  // volumeWatcher is used to watch a single volume and trigger the
    17  // scheduler when allocation health transitions.
    18  type volumeWatcher struct {
    19  	// v is the volume being watched
    20  	v *structs.CSIVolume
    21  
    22  	// state is the state that is watched for state changes.
    23  	state *state.StateStore
    24  
    25  	// server interface for CSI client RPCs
    26  	rpc CSIVolumeRPC
    27  
    28  	// the ACL needed to send RPCs
    29  	leaderAcl string
    30  
    31  	logger      log.Logger
    32  	shutdownCtx context.Context // parent context
    33  	ctx         context.Context // own context
    34  	exitFn      context.CancelFunc
    35  	deleteFn    func()
    36  
    37  	// quiescentTimeout is the time we wait until the volume has "settled"
    38  	// before stopping the child watcher goroutines
    39  	quiescentTimeout time.Duration
    40  
    41  	// updateCh is triggered when there is an updated volume
    42  	updateCh chan *structs.CSIVolume
    43  
    44  	wLock   sync.RWMutex
    45  	running bool
    46  }
    47  
    48  // newVolumeWatcher returns a volume watcher that is used to watch
    49  // volumes
    50  func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher {
    51  
    52  	w := &volumeWatcher{
    53  		updateCh:         make(chan *structs.CSIVolume, 1),
    54  		v:                vol,
    55  		state:            parent.state,
    56  		rpc:              parent.rpc,
    57  		leaderAcl:        parent.leaderAcl,
    58  		logger:           parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace),
    59  		shutdownCtx:      parent.ctx,
    60  		deleteFn:         func() { parent.remove(vol.ID + vol.Namespace) },
    61  		quiescentTimeout: parent.quiescentTimeout,
    62  	}
    63  
    64  	// Start the long lived watcher that scans for allocation updates
    65  	w.Start()
    66  	return w
    67  }
    68  
    69  // Notify signals an update to the tracked volume.
    70  func (vw *volumeWatcher) Notify(v *structs.CSIVolume) {
    71  	if !vw.isRunning() {
    72  		vw.Start()
    73  	}
    74  	select {
    75  	case vw.updateCh <- v:
    76  	case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped
    77  	}
    78  }
    79  
    80  func (vw *volumeWatcher) Start() {
    81  	vw.logger.Trace("starting watcher")
    82  	vw.wLock.Lock()
    83  	defer vw.wLock.Unlock()
    84  	vw.running = true
    85  	go vw.watch()
    86  }
    87  
    88  func (vw *volumeWatcher) Stop() {
    89  	vw.logger.Trace("no more claims")
    90  	vw.wLock.Lock()
    91  	defer vw.wLock.Unlock()
    92  	vw.running = false
    93  }
    94  
    95  func (vw *volumeWatcher) isRunning() bool {
    96  	vw.wLock.RLock()
    97  	defer vw.wLock.RUnlock()
    98  	select {
    99  	case <-vw.shutdownCtx.Done():
   100  		return false
   101  	default:
   102  		return vw.running
   103  	}
   104  }
   105  
   106  // watch is the long-running function that watches for changes to a volume.
   107  // Each pass steps the volume's claims through the various states of reaping
   108  // until the volume has no more claims eligible to be reaped.
   109  func (vw *volumeWatcher) watch() {
   110  	defer vw.deleteFn()
   111  	defer vw.Stop()
   112  
   113  	timer, stop := helper.NewSafeTimer(vw.quiescentTimeout)
   114  	defer stop()
   115  
   116  	for {
   117  		select {
   118  		// TODO(tgross): currently server->client RPC have no cancellation
   119  		// context, so we can't stop the long-runner RPCs gracefully
   120  		case <-vw.shutdownCtx.Done():
   121  			return
   122  		case vol := <-vw.updateCh:
   123  			vol = vw.getVolume(vol)
   124  			if vol == nil {
   125  				return
   126  			}
   127  			vw.volumeReap(vol)
   128  			timer.Reset(vw.quiescentTimeout)
   129  		case <-timer.C:
   130  			// Wait until the volume has "settled" before stopping this
   131  			// goroutine so that we can handle the burst of updates around
   132  			// freeing claims without having to spin it back up
   133  			return
   134  		}
   135  	}
   136  }
   137  
   138  // getVolume returns the tracked volume, fully populated with the current
   139  // state
   140  func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume {
   141  	vw.wLock.RLock()
   142  	defer vw.wLock.RUnlock()
   143  
   144  	var err error
   145  	ws := memdb.NewWatchSet()
   146  
   147  	vol, err = vw.state.CSIVolumeByID(ws, vol.Namespace, vol.ID)
   148  	if err != nil {
   149  		vw.logger.Error("could not query for volume", "error", err)
   150  		return nil
   151  	}
   152  	if vol == nil {
   153  		return nil
   154  	}
   155  
   156  	vol, err = vw.state.CSIVolumeDenormalize(ws, vol)
   157  	if err != nil {
   158  		vw.logger.Error("could not query allocs for volume", "error", err)
   159  		return nil
   160  	}
   161  	vw.v = vol
   162  	return vol
   163  }
   164  
   165  // volumeReap collects errors for logging but doesn't return them
   166  // to the main loop.
   167  func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) {
   168  	vw.logger.Trace("releasing unused volume claims")
   169  	err := vw.volumeReapImpl(vol)
   170  	if err != nil {
   171  		vw.logger.Error("error releasing volume claims", "error", err)
   172  	}
   173  }
   174  
   175  func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool {
   176  	return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0
   177  }
   178  
   179  // volumeReapImpl unpublished all the volume's PastClaims. PastClaims
   180  // will be populated from nil or terminal allocs when we call
   181  // CSIVolumeDenormalize(), so this assumes we've done so in the caller
   182  func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error {
   183  	var result *multierror.Error
   184  	for _, claim := range vol.PastClaims {
   185  		err := vw.unpublish(vol, claim)
   186  		if err != nil {
   187  			result = multierror.Append(result, err)
   188  		}
   189  	}
   190  	return result.ErrorOrNil()
   191  }
   192  
   193  func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume {
   194  
   195  	collect := func(allocs map[string]*structs.Allocation,
   196  		claims map[string]*structs.CSIVolumeClaim) {
   197  
   198  		for allocID, alloc := range allocs {
   199  			if alloc == nil {
   200  				_, exists := vol.PastClaims[allocID]
   201  				if !exists {
   202  					vol.PastClaims[allocID] = &structs.CSIVolumeClaim{
   203  						AllocationID: allocID,
   204  						State:        structs.CSIVolumeClaimStateReadyToFree,
   205  					}
   206  				}
   207  			} else if alloc.Terminated() {
   208  				// don't overwrite the PastClaim if we've seen it before,
   209  				// so that we can track state between subsequent calls
   210  				_, exists := vol.PastClaims[allocID]
   211  				if !exists {
   212  					claim, ok := claims[allocID]
   213  					if !ok {
   214  						claim = &structs.CSIVolumeClaim{
   215  							AllocationID: allocID,
   216  							NodeID:       alloc.NodeID,
   217  						}
   218  					}
   219  					claim.State = structs.CSIVolumeClaimStateTaken
   220  					vol.PastClaims[allocID] = claim
   221  				}
   222  			}
   223  		}
   224  	}
   225  
   226  	collect(vol.ReadAllocs, vol.ReadClaims)
   227  	collect(vol.WriteAllocs, vol.WriteClaims)
   228  	return vol
   229  }
   230  
   231  func (vw *volumeWatcher) unpublish(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
   232  	vw.logger.Trace("unpublishing volume", "alloc", claim.AllocationID)
   233  	req := &structs.CSIVolumeUnpublishRequest{
   234  		VolumeID: vol.ID,
   235  		Claim:    claim,
   236  		WriteRequest: structs.WriteRequest{
   237  			Namespace: vol.Namespace,
   238  			Region:    vw.state.Config().Region,
   239  			AuthToken: vw.leaderAcl,
   240  		},
   241  	}
   242  	err := vw.rpc.Unpublish(req, &structs.CSIVolumeUnpublishResponse{})
   243  	if err != nil {
   244  		return err
   245  	}
   246  	claim.State = structs.CSIVolumeClaimStateReadyToFree
   247  	return nil
   248  }