github.com/mattermost/mattermost-plugin-api@v0.1.4/cluster/job_once.go (about)

     1  package cluster
     2  
     3  import (
     4  	"encoding/json"
     5  	"math/rand"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/mattermost/mattermost-server/v6/model"
    10  	"github.com/pkg/errors"
    11  )
    12  
    13  const (
    14  	// oncePrefix is used to namespace key values created for a scheduleOnce job
    15  	oncePrefix = "once_"
    16  
    17  	// keysPerPage is the maximum number of keys to retrieve from the db per call
    18  	keysPerPage = 1000
    19  
    20  	// maxNumFails is the maximum number of KVStore read fails or failed attempts to run the
    21  	// callback until the scheduler cancels a job.
    22  	maxNumFails = 3
    23  
    24  	// waitAfterFail is the amount of time to wait after a failure
    25  	waitAfterFail = 1 * time.Second
    26  
    27  	// pollNewJobsInterval is the amount of time to wait between polling the db for new scheduled jobs
    28  	pollNewJobsInterval = 5 * time.Minute
    29  
    30  	// scheduleOnceJitter is the range of jitter to add to intervals to avoid contention issues
    31  	scheduleOnceJitter = 100 * time.Millisecond
    32  
    33  	// propsLimit is the maximum length in bytes of the json-representation of a job's props.
    34  	// It exists to prevent job go rountines from consuming too much memory, as they are long running.
    35  	propsLimit = 10000
    36  )
    37  
    38  type JobOnceMetadata struct {
    39  	Key   string
    40  	RunAt time.Time
    41  	Props any
    42  }
    43  
    44  type JobOnce struct {
    45  	pluginAPI    JobPluginAPI
    46  	clusterMutex *Mutex
    47  
    48  	// key is the original key. It is prefixed with oncePrefix when used as a key in the KVStore
    49  	key      string
    50  	props    any
    51  	runAt    time.Time
    52  	numFails int
    53  
    54  	// done signals the job.run go routine to exit
    55  	done     chan bool
    56  	doneOnce sync.Once
    57  
    58  	// join is a join point for the job.run() goroutine to join the calling goroutine (in this case,
    59  	// the one calling job.Cancel)
    60  	join     chan bool
    61  	joinOnce sync.Once
    62  
    63  	storedCallback *syncedCallback
    64  	activeJobs     *syncedJobs
    65  }
    66  
    67  // Cancel terminates a scheduled job, preventing it from being scheduled on this plugin instance.
    68  // It also removes the job from the db, preventing it from being run in the future.
    69  func (j *JobOnce) Cancel() {
    70  	j.clusterMutex.Lock()
    71  	defer j.clusterMutex.Unlock()
    72  
    73  	j.cancelWhileHoldingMutex()
    74  
    75  	// join the running goroutine
    76  	j.joinOnce.Do(func() {
    77  		<-j.join
    78  	})
    79  }
    80  
    81  func newJobOnce(pluginAPI JobPluginAPI, key string, runAt time.Time, callback *syncedCallback, jobs *syncedJobs, props any) (*JobOnce, error) {
    82  	mutex, err := NewMutex(pluginAPI, key)
    83  	if err != nil {
    84  		return nil, errors.Wrap(err, "failed to create job mutex")
    85  	}
    86  
    87  	propsBytes, err := json.Marshal(props)
    88  	if err != nil {
    89  		return nil, errors.Wrap(err, "failed to marshal props")
    90  	}
    91  
    92  	if len(propsBytes) > propsLimit {
    93  		return nil, errors.Errorf("props length extends limit")
    94  	}
    95  
    96  	return &JobOnce{
    97  		pluginAPI:      pluginAPI,
    98  		clusterMutex:   mutex,
    99  		key:            key,
   100  		props:          props,
   101  		runAt:          runAt,
   102  		done:           make(chan bool),
   103  		join:           make(chan bool),
   104  		storedCallback: callback,
   105  		activeJobs:     jobs,
   106  	}, nil
   107  }
   108  
   109  func (j *JobOnce) run() {
   110  	defer close(j.join)
   111  
   112  	wait := time.Until(j.runAt)
   113  
   114  	for {
   115  		select {
   116  		case <-j.done:
   117  			return
   118  		case <-time.After(wait + addJitter()):
   119  		}
   120  
   121  		func() {
   122  			// Acquire the cluster mutex while we're trying to do the job
   123  			j.clusterMutex.Lock()
   124  			defer j.clusterMutex.Unlock()
   125  
   126  			// Check that the job has not been completed
   127  			metadata, err := readMetadata(j.pluginAPI, j.key)
   128  			if err != nil {
   129  				j.numFails++
   130  				if j.numFails > maxNumFails {
   131  					j.cancelWhileHoldingMutex()
   132  					return
   133  				}
   134  
   135  				// wait a bit of time and try again
   136  				wait = waitAfterFail
   137  				return
   138  			}
   139  
   140  			// If key doesn't exist, or if the runAt has changed, the original job has been completed already
   141  			if metadata == nil || !j.runAt.Equal(metadata.RunAt) {
   142  				j.cancelWhileHoldingMutex()
   143  				return
   144  			}
   145  
   146  			j.executeJob()
   147  
   148  			j.cancelWhileHoldingMutex()
   149  		}()
   150  	}
   151  }
   152  
   153  func (j *JobOnce) executeJob() {
   154  	j.storedCallback.mu.Lock()
   155  	defer j.storedCallback.mu.Unlock()
   156  
   157  	j.storedCallback.callback(j.key, j.props)
   158  }
   159  
   160  // readMetadata reads the job's stored metadata. If the caller wishes to make an atomic
   161  // read/write, the cluster mutex for job's key should be held.
   162  func readMetadata(pluginAPI JobPluginAPI, key string) (*JobOnceMetadata, error) {
   163  	data, appErr := pluginAPI.KVGet(oncePrefix + key)
   164  	if appErr != nil {
   165  		return nil, errors.Wrap(normalizeAppErr(appErr), "failed to read data")
   166  	}
   167  
   168  	if data == nil {
   169  		return nil, nil
   170  	}
   171  
   172  	var metadata JobOnceMetadata
   173  	if err := json.Unmarshal(data, &metadata); err != nil {
   174  		return nil, errors.Wrap(err, "failed to decode data")
   175  	}
   176  
   177  	return &metadata, nil
   178  }
   179  
   180  // saveMetadata writes the job's metadata to the kvstore. saveMetadata acquires the job's cluster lock.
   181  // saveMetadata will not overwrite an existing key.
   182  func (j *JobOnce) saveMetadata() error {
   183  	j.clusterMutex.Lock()
   184  	defer j.clusterMutex.Unlock()
   185  
   186  	metadata := JobOnceMetadata{
   187  		Key:   j.key,
   188  		Props: j.props,
   189  		RunAt: j.runAt,
   190  	}
   191  	data, err := json.Marshal(metadata)
   192  	if err != nil {
   193  		return errors.Wrap(err, "failed to marshal data")
   194  	}
   195  
   196  	ok, appErr := j.pluginAPI.KVSetWithOptions(oncePrefix+j.key, data, model.PluginKVSetOptions{
   197  		Atomic:   true,
   198  		OldValue: nil,
   199  	})
   200  	if appErr != nil {
   201  		return normalizeAppErr(appErr)
   202  	}
   203  	if !ok {
   204  		return errors.New("failed to set data")
   205  	}
   206  
   207  	return nil
   208  }
   209  
   210  // cancelWhileHoldingMutex assumes the caller holds the job's mutex.
   211  func (j *JobOnce) cancelWhileHoldingMutex() {
   212  	// remove the job from the kv store, if it exists
   213  	_ = j.pluginAPI.KVDelete(oncePrefix + j.key)
   214  
   215  	j.activeJobs.mu.Lock()
   216  	defer j.activeJobs.mu.Unlock()
   217  	delete(j.activeJobs.jobs, j.key)
   218  
   219  	j.doneOnce.Do(func() {
   220  		close(j.done)
   221  	})
   222  }
   223  
   224  func addJitter() time.Duration {
   225  	return time.Duration(rand.Int63n(int64(scheduleOnceJitter)))
   226  }
   227  
   228  func normalizeAppErr(appErr *model.AppError) error {
   229  	if appErr == nil {
   230  		return nil
   231  	}
   232  
   233  	return appErr
   234  }