github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/volumewatcher/volume_watcher.go

github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/volumewatcher/volume_watcher.go (about)

     1  package volumewatcher
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  
     7  	log "github.com/hashicorp/go-hclog"
     8  	memdb "github.com/hashicorp/go-memdb"
     9  	multierror "github.com/hashicorp/go-multierror"
    10  	"github.com/hashicorp/nomad/nomad/state"
    11  	"github.com/hashicorp/nomad/nomad/structs"
    12  )
    13  
    14  // volumeWatcher is used to watch a single volume and trigger the
    15  // scheduler when allocation health transitions.
    16  type volumeWatcher struct {
    17  	// v is the volume being watched
    18  	v *structs.CSIVolume
    19  
    20  	// state is the state that is watched for state changes.
    21  	state *state.StateStore
    22  
    23  	// server interface for CSI client RPCs
    24  	rpc CSIVolumeRPC
    25  
    26  	// the ACL needed to send RPCs
    27  	leaderAcl string
    28  
    29  	logger      log.Logger
    30  	shutdownCtx context.Context // parent context
    31  	ctx         context.Context // own context
    32  	exitFn      context.CancelFunc
    33  
    34  	// updateCh is triggered when there is an updated volume
    35  	updateCh chan *structs.CSIVolume
    36  
    37  	wLock   sync.RWMutex
    38  	running bool
    39  }
    40  
    41  // newVolumeWatcher returns a volume watcher that is used to watch
    42  // volumes
    43  func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher {
    44  
    45  	w := &volumeWatcher{
    46  		updateCh:    make(chan *structs.CSIVolume, 1),
    47  		v:           vol,
    48  		state:       parent.state,
    49  		rpc:         parent.rpc,
    50  		leaderAcl:   parent.leaderAcl,
    51  		logger:      parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace),
    52  		shutdownCtx: parent.ctx,
    53  	}
    54  
    55  	// Start the long lived watcher that scans for allocation updates
    56  	w.Start()
    57  	return w
    58  }
    59  
    60  // Notify signals an update to the tracked volume.
    61  func (vw *volumeWatcher) Notify(v *structs.CSIVolume) {
    62  	if !vw.isRunning() {
    63  		vw.Start()
    64  	}
    65  	select {
    66  	case vw.updateCh <- v:
    67  	case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped
    68  	case <-vw.ctx.Done(): // prevent deadlock if we stopped
    69  	}
    70  }
    71  
    72  func (vw *volumeWatcher) Start() {
    73  	vw.logger.Trace("starting watcher")
    74  	vw.wLock.Lock()
    75  	defer vw.wLock.Unlock()
    76  	vw.running = true
    77  	ctx, exitFn := context.WithCancel(vw.shutdownCtx)
    78  	vw.ctx = ctx
    79  	vw.exitFn = exitFn
    80  	go vw.watch()
    81  }
    82  
    83  // Stop stops watching the volume. This should be called whenever a
    84  // volume's claims are fully reaped or the watcher is no longer needed.
    85  func (vw *volumeWatcher) Stop() {
    86  	vw.logger.Trace("no more claims")
    87  	vw.exitFn()
    88  }
    89  
    90  func (vw *volumeWatcher) isRunning() bool {
    91  	vw.wLock.RLock()
    92  	defer vw.wLock.RUnlock()
    93  	select {
    94  	case <-vw.shutdownCtx.Done():
    95  		return false
    96  	case <-vw.ctx.Done():
    97  		return false
    98  	default:
    99  		return vw.running
   100  	}
   101  }
   102  
   103  // watch is the long-running function that watches for changes to a volume.
   104  // Each pass steps the volume's claims through the various states of reaping
   105  // until the volume has no more claims eligible to be reaped.
   106  func (vw *volumeWatcher) watch() {
   107  	for {
   108  		select {
   109  		// TODO(tgross): currently server->client RPC have no cancellation
   110  		// context, so we can't stop the long-runner RPCs gracefully
   111  		case <-vw.shutdownCtx.Done():
   112  			return
   113  		case <-vw.ctx.Done():
   114  			return
   115  		case vol := <-vw.updateCh:
   116  			// while we won't make raft writes if we get a stale update,
   117  			// we can still fire extra CSI RPC calls if we don't check this
   118  			if vol.ModifyIndex >= vw.v.ModifyIndex {
   119  				vol = vw.getVolume(vol)
   120  				if vol == nil {
   121  					return
   122  				}
   123  				vw.volumeReap(vol)
   124  			}
   125  		default:
   126  			vw.Stop() // no pending work
   127  			return
   128  		}
   129  	}
   130  }
   131  
   132  // getVolume returns the tracked volume, fully populated with the current
   133  // state
   134  func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume {
   135  	vw.wLock.RLock()
   136  	defer vw.wLock.RUnlock()
   137  
   138  	var err error
   139  	ws := memdb.NewWatchSet()
   140  
   141  	vol, err = vw.state.CSIVolumeDenormalizePlugins(ws, vol.Copy())
   142  	if err != nil {
   143  		vw.logger.Error("could not query plugins for volume", "error", err)
   144  		return nil
   145  	}
   146  
   147  	vol, err = vw.state.CSIVolumeDenormalize(ws, vol)
   148  	if err != nil {
   149  		vw.logger.Error("could not query allocs for volume", "error", err)
   150  		return nil
   151  	}
   152  	vw.v = vol
   153  	return vol
   154  }
   155  
   156  // volumeReap collects errors for logging but doesn't return them
   157  // to the main loop.
   158  func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) {
   159  	vw.logger.Trace("releasing unused volume claims")
   160  	err := vw.volumeReapImpl(vol)
   161  	if err != nil {
   162  		vw.logger.Error("error releasing volume claims", "error", err)
   163  	}
   164  	if vw.isUnclaimed(vol) {
   165  		vw.Stop()
   166  	}
   167  }
   168  
   169  func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool {
   170  	return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0
   171  }
   172  
   173  func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error {
   174  
   175  	// PastClaims written by a volume GC core job will have no allocation,
   176  	// so we need to find out which allocs are eligible for cleanup.
   177  	for _, claim := range vol.PastClaims {
   178  		if claim.AllocationID == "" {
   179  			vol = vw.collectPastClaims(vol)
   180  			break // only need to collect once
   181  		}
   182  	}
   183  
   184  	var result *multierror.Error
   185  	for _, claim := range vol.PastClaims {
   186  		err := vw.unpublish(vol, claim)
   187  		if err != nil {
   188  			result = multierror.Append(result, err)
   189  		}
   190  	}
   191  
   192  	return result.ErrorOrNil()
   193  
   194  }
   195  
   196  func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume {
   197  
   198  	collect := func(allocs map[string]*structs.Allocation,
   199  		claims map[string]*structs.CSIVolumeClaim) {
   200  
   201  		for allocID, alloc := range allocs {
   202  			if alloc == nil {
   203  				_, exists := vol.PastClaims[allocID]
   204  				if !exists {
   205  					vol.PastClaims[allocID] = &structs.CSIVolumeClaim{
   206  						AllocationID: allocID,
   207  						State:        structs.CSIVolumeClaimStateReadyToFree,
   208  					}
   209  				}
   210  			} else if alloc.Terminated() {
   211  				// don't overwrite the PastClaim if we've seen it before,
   212  				// so that we can track state between subsequent calls
   213  				_, exists := vol.PastClaims[allocID]
   214  				if !exists {
   215  					claim, ok := claims[allocID]
   216  					if !ok {
   217  						claim = &structs.CSIVolumeClaim{
   218  							AllocationID: allocID,
   219  							NodeID:       alloc.NodeID,
   220  						}
   221  					}
   222  					claim.State = structs.CSIVolumeClaimStateTaken
   223  					vol.PastClaims[allocID] = claim
   224  				}
   225  			}
   226  		}
   227  	}
   228  
   229  	collect(vol.ReadAllocs, vol.ReadClaims)
   230  	collect(vol.WriteAllocs, vol.WriteClaims)
   231  	return vol
   232  }
   233  
   234  func (vw *volumeWatcher) unpublish(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
   235  	req := &structs.CSIVolumeUnpublishRequest{
   236  		VolumeID: vol.ID,
   237  		Claim:    claim,
   238  		WriteRequest: structs.WriteRequest{
   239  			Namespace: vol.Namespace,
   240  			Region:    vw.state.Config().Region,
   241  			AuthToken: vw.leaderAcl,
   242  		},
   243  	}
   244  	err := vw.rpc.Unpublish(req, &structs.CSIVolumeUnpublishResponse{})
   245  	if err != nil {
   246  		return err
   247  	}
   248  	claim.State = structs.CSIVolumeClaimStateReadyToFree
   249  	return nil
   250  }