github.com/hernad/nomad@v1.6.112/nomad/volumewatcher/volume_watcher.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package volumewatcher
     5  
     6  import (
     7  	"context"
     8  	"sync"
     9  	"time"
    10  
    11  	log "github.com/hashicorp/go-hclog"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	multierror "github.com/hashicorp/go-multierror"
    14  	"github.com/hernad/nomad/helper"
    15  	"github.com/hernad/nomad/nomad/state"
    16  	"github.com/hernad/nomad/nomad/structs"
    17  )
    18  
    19  // volumeWatcher is used to watch a single volume and trigger the
    20  // scheduler when allocation health transitions.
    21  type volumeWatcher struct {
    22  	// v is the volume being watched
    23  	v *structs.CSIVolume
    24  
    25  	// state is the state that is watched for state changes.
    26  	state *state.StateStore
    27  
    28  	// server interface for CSI client RPCs
    29  	rpc CSIVolumeRPC
    30  
    31  	// the ACL needed to send RPCs
    32  	leaderAcl string
    33  
    34  	logger      log.Logger
    35  	shutdownCtx context.Context // parent context
    36  	ctx         context.Context // own context
    37  	exitFn      context.CancelFunc
    38  	deleteFn    func()
    39  
    40  	// quiescentTimeout is the time we wait until the volume has "settled"
    41  	// before stopping the child watcher goroutines
    42  	quiescentTimeout time.Duration
    43  
    44  	// updateCh is triggered when there is an updated volume
    45  	updateCh chan *structs.CSIVolume
    46  
    47  	wLock   sync.RWMutex
    48  	running bool
    49  }
    50  
    51  // newVolumeWatcher returns a volume watcher that is used to watch
    52  // volumes
    53  func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher {
    54  
    55  	w := &volumeWatcher{
    56  		updateCh:         make(chan *structs.CSIVolume, 1),
    57  		v:                vol,
    58  		state:            parent.state,
    59  		rpc:              parent.rpc,
    60  		leaderAcl:        parent.leaderAcl,
    61  		logger:           parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace),
    62  		shutdownCtx:      parent.ctx,
    63  		deleteFn:         func() { parent.remove(vol.ID + vol.Namespace) },
    64  		quiescentTimeout: parent.quiescentTimeout,
    65  	}
    66  
    67  	// Start the long lived watcher that scans for allocation updates
    68  	w.Start()
    69  	return w
    70  }
    71  
    72  // Notify signals an update to the tracked volume.
    73  func (vw *volumeWatcher) Notify(v *structs.CSIVolume) {
    74  	if !vw.isRunning() {
    75  		vw.Start()
    76  	}
    77  	select {
    78  	case vw.updateCh <- v:
    79  	case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped
    80  	}
    81  }
    82  
    83  func (vw *volumeWatcher) Start() {
    84  	vw.logger.Trace("starting watcher")
    85  	vw.wLock.Lock()
    86  	defer vw.wLock.Unlock()
    87  	vw.running = true
    88  	go vw.watch()
    89  }
    90  
    91  func (vw *volumeWatcher) Stop() {
    92  	vw.logger.Trace("no more claims")
    93  	vw.wLock.Lock()
    94  	defer vw.wLock.Unlock()
    95  	vw.running = false
    96  }
    97  
    98  func (vw *volumeWatcher) isRunning() bool {
    99  	vw.wLock.RLock()
   100  	defer vw.wLock.RUnlock()
   101  	select {
   102  	case <-vw.shutdownCtx.Done():
   103  		return false
   104  	default:
   105  		return vw.running
   106  	}
   107  }
   108  
   109  // watch is the long-running function that watches for changes to a volume.
   110  // Each pass steps the volume's claims through the various states of reaping
   111  // until the volume has no more claims eligible to be reaped.
   112  func (vw *volumeWatcher) watch() {
   113  	defer vw.deleteFn()
   114  	defer vw.Stop()
   115  
   116  	timer, stop := helper.NewSafeTimer(vw.quiescentTimeout)
   117  	defer stop()
   118  
   119  	for {
   120  		select {
   121  		// TODO(tgross): currently server->client RPC have no cancellation
   122  		// context, so we can't stop the long-runner RPCs gracefully
   123  		case <-vw.shutdownCtx.Done():
   124  			return
   125  		case vol := <-vw.updateCh:
   126  			vol = vw.getVolume(vol)
   127  			if vol == nil {
   128  				return
   129  			}
   130  			vw.volumeReap(vol)
   131  			timer.Reset(vw.quiescentTimeout)
   132  		case <-timer.C:
   133  			// Wait until the volume has "settled" before stopping this
   134  			// goroutine so that we can handle the burst of updates around
   135  			// freeing claims without having to spin it back up
   136  			return
   137  		}
   138  	}
   139  }
   140  
   141  // getVolume returns the tracked volume, fully populated with the current
   142  // state
   143  func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume {
   144  	vw.wLock.RLock()
   145  	defer vw.wLock.RUnlock()
   146  
   147  	var err error
   148  	ws := memdb.NewWatchSet()
   149  
   150  	vol, err = vw.state.CSIVolumeByID(ws, vol.Namespace, vol.ID)
   151  	if err != nil {
   152  		vw.logger.Error("could not query for volume", "error", err)
   153  		return nil
   154  	}
   155  	if vol == nil {
   156  		return nil
   157  	}
   158  
   159  	vol, err = vw.state.CSIVolumeDenormalize(ws, vol)
   160  	if err != nil {
   161  		vw.logger.Error("could not query allocs for volume", "error", err)
   162  		return nil
   163  	}
   164  	vw.v = vol
   165  	return vol
   166  }
   167  
   168  // volumeReap collects errors for logging but doesn't return them
   169  // to the main loop.
   170  func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) {
   171  	vw.logger.Trace("releasing unused volume claims")
   172  	err := vw.volumeReapImpl(vol)
   173  	if err != nil {
   174  		vw.logger.Error("error releasing volume claims", "error", err)
   175  	}
   176  }
   177  
   178  func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool {
   179  	return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0
   180  }
   181  
   182  // volumeReapImpl unpublished all the volume's PastClaims. PastClaims
   183  // will be populated from nil or terminal allocs when we call
   184  // CSIVolumeDenormalize(), so this assumes we've done so in the caller
   185  func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error {
   186  	var result *multierror.Error
   187  	for _, claim := range vol.PastClaims {
   188  		err := vw.unpublish(vol, claim)
   189  		if err != nil {
   190  			result = multierror.Append(result, err)
   191  		}
   192  	}
   193  	return result.ErrorOrNil()
   194  }
   195  
   196  func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume {
   197  
   198  	collect := func(allocs map[string]*structs.Allocation,
   199  		claims map[string]*structs.CSIVolumeClaim) {
   200  
   201  		for allocID, alloc := range allocs {
   202  			if alloc == nil {
   203  				_, exists := vol.PastClaims[allocID]
   204  				if !exists {
   205  					vol.PastClaims[allocID] = &structs.CSIVolumeClaim{
   206  						AllocationID: allocID,
   207  						State:        structs.CSIVolumeClaimStateReadyToFree,
   208  					}
   209  				}
   210  			} else if alloc.Terminated() {
   211  				// don't overwrite the PastClaim if we've seen it before,
   212  				// so that we can track state between subsequent calls
   213  				_, exists := vol.PastClaims[allocID]
   214  				if !exists {
   215  					claim, ok := claims[allocID]
   216  					if !ok {
   217  						claim = &structs.CSIVolumeClaim{
   218  							AllocationID: allocID,
   219  							NodeID:       alloc.NodeID,
   220  						}
   221  					}
   222  					claim.State = structs.CSIVolumeClaimStateTaken
   223  					vol.PastClaims[allocID] = claim
   224  				}
   225  			}
   226  		}
   227  	}
   228  
   229  	collect(vol.ReadAllocs, vol.ReadClaims)
   230  	collect(vol.WriteAllocs, vol.WriteClaims)
   231  	return vol
   232  }
   233  
   234  func (vw *volumeWatcher) unpublish(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
   235  	vw.logger.Trace("unpublishing volume", "alloc", claim.AllocationID)
   236  	req := &structs.CSIVolumeUnpublishRequest{
   237  		VolumeID: vol.ID,
   238  		Claim:    claim,
   239  		WriteRequest: structs.WriteRequest{
   240  			Namespace: vol.Namespace,
   241  			Region:    vw.state.Config().Region,
   242  			AuthToken: vw.leaderAcl,
   243  		},
   244  	}
   245  	err := vw.rpc.Unpublish(req, &structs.CSIVolumeUnpublishResponse{})
   246  	if err != nil {
   247  		return err
   248  	}
   249  	claim.State = structs.CSIVolumeClaimStateReadyToFree
   250  	return nil
   251  }