github.com/mattermost/mattermost-plugin-api@v0.1.4/cluster/job_once.go (about) 1 package cluster 2 3 import ( 4 "encoding/json" 5 "math/rand" 6 "sync" 7 "time" 8 9 "github.com/mattermost/mattermost-server/v6/model" 10 "github.com/pkg/errors" 11 ) 12 13 const ( 14 // oncePrefix is used to namespace key values created for a scheduleOnce job 15 oncePrefix = "once_" 16 17 // keysPerPage is the maximum number of keys to retrieve from the db per call 18 keysPerPage = 1000 19 20 // maxNumFails is the maximum number of KVStore read fails or failed attempts to run the 21 // callback until the scheduler cancels a job. 22 maxNumFails = 3 23 24 // waitAfterFail is the amount of time to wait after a failure 25 waitAfterFail = 1 * time.Second 26 27 // pollNewJobsInterval is the amount of time to wait between polling the db for new scheduled jobs 28 pollNewJobsInterval = 5 * time.Minute 29 30 // scheduleOnceJitter is the range of jitter to add to intervals to avoid contention issues 31 scheduleOnceJitter = 100 * time.Millisecond 32 33 // propsLimit is the maximum length in bytes of the json-representation of a job's props. 34 // It exists to prevent job go rountines from consuming too much memory, as they are long running. 35 propsLimit = 10000 36 ) 37 38 type JobOnceMetadata struct { 39 Key string 40 RunAt time.Time 41 Props any 42 } 43 44 type JobOnce struct { 45 pluginAPI JobPluginAPI 46 clusterMutex *Mutex 47 48 // key is the original key. It is prefixed with oncePrefix when used as a key in the KVStore 49 key string 50 props any 51 runAt time.Time 52 numFails int 53 54 // done signals the job.run go routine to exit 55 done chan bool 56 doneOnce sync.Once 57 58 // join is a join point for the job.run() goroutine to join the calling goroutine (in this case, 59 // the one calling job.Cancel) 60 join chan bool 61 joinOnce sync.Once 62 63 storedCallback *syncedCallback 64 activeJobs *syncedJobs 65 } 66 67 // Cancel terminates a scheduled job, preventing it from being scheduled on this plugin instance. 68 // It also removes the job from the db, preventing it from being run in the future. 69 func (j *JobOnce) Cancel() { 70 j.clusterMutex.Lock() 71 defer j.clusterMutex.Unlock() 72 73 j.cancelWhileHoldingMutex() 74 75 // join the running goroutine 76 j.joinOnce.Do(func() { 77 <-j.join 78 }) 79 } 80 81 func newJobOnce(pluginAPI JobPluginAPI, key string, runAt time.Time, callback *syncedCallback, jobs *syncedJobs, props any) (*JobOnce, error) { 82 mutex, err := NewMutex(pluginAPI, key) 83 if err != nil { 84 return nil, errors.Wrap(err, "failed to create job mutex") 85 } 86 87 propsBytes, err := json.Marshal(props) 88 if err != nil { 89 return nil, errors.Wrap(err, "failed to marshal props") 90 } 91 92 if len(propsBytes) > propsLimit { 93 return nil, errors.Errorf("props length extends limit") 94 } 95 96 return &JobOnce{ 97 pluginAPI: pluginAPI, 98 clusterMutex: mutex, 99 key: key, 100 props: props, 101 runAt: runAt, 102 done: make(chan bool), 103 join: make(chan bool), 104 storedCallback: callback, 105 activeJobs: jobs, 106 }, nil 107 } 108 109 func (j *JobOnce) run() { 110 defer close(j.join) 111 112 wait := time.Until(j.runAt) 113 114 for { 115 select { 116 case <-j.done: 117 return 118 case <-time.After(wait + addJitter()): 119 } 120 121 func() { 122 // Acquire the cluster mutex while we're trying to do the job 123 j.clusterMutex.Lock() 124 defer j.clusterMutex.Unlock() 125 126 // Check that the job has not been completed 127 metadata, err := readMetadata(j.pluginAPI, j.key) 128 if err != nil { 129 j.numFails++ 130 if j.numFails > maxNumFails { 131 j.cancelWhileHoldingMutex() 132 return 133 } 134 135 // wait a bit of time and try again 136 wait = waitAfterFail 137 return 138 } 139 140 // If key doesn't exist, or if the runAt has changed, the original job has been completed already 141 if metadata == nil || !j.runAt.Equal(metadata.RunAt) { 142 j.cancelWhileHoldingMutex() 143 return 144 } 145 146 j.executeJob() 147 148 j.cancelWhileHoldingMutex() 149 }() 150 } 151 } 152 153 func (j *JobOnce) executeJob() { 154 j.storedCallback.mu.Lock() 155 defer j.storedCallback.mu.Unlock() 156 157 j.storedCallback.callback(j.key, j.props) 158 } 159 160 // readMetadata reads the job's stored metadata. If the caller wishes to make an atomic 161 // read/write, the cluster mutex for job's key should be held. 162 func readMetadata(pluginAPI JobPluginAPI, key string) (*JobOnceMetadata, error) { 163 data, appErr := pluginAPI.KVGet(oncePrefix + key) 164 if appErr != nil { 165 return nil, errors.Wrap(normalizeAppErr(appErr), "failed to read data") 166 } 167 168 if data == nil { 169 return nil, nil 170 } 171 172 var metadata JobOnceMetadata 173 if err := json.Unmarshal(data, &metadata); err != nil { 174 return nil, errors.Wrap(err, "failed to decode data") 175 } 176 177 return &metadata, nil 178 } 179 180 // saveMetadata writes the job's metadata to the kvstore. saveMetadata acquires the job's cluster lock. 181 // saveMetadata will not overwrite an existing key. 182 func (j *JobOnce) saveMetadata() error { 183 j.clusterMutex.Lock() 184 defer j.clusterMutex.Unlock() 185 186 metadata := JobOnceMetadata{ 187 Key: j.key, 188 Props: j.props, 189 RunAt: j.runAt, 190 } 191 data, err := json.Marshal(metadata) 192 if err != nil { 193 return errors.Wrap(err, "failed to marshal data") 194 } 195 196 ok, appErr := j.pluginAPI.KVSetWithOptions(oncePrefix+j.key, data, model.PluginKVSetOptions{ 197 Atomic: true, 198 OldValue: nil, 199 }) 200 if appErr != nil { 201 return normalizeAppErr(appErr) 202 } 203 if !ok { 204 return errors.New("failed to set data") 205 } 206 207 return nil 208 } 209 210 // cancelWhileHoldingMutex assumes the caller holds the job's mutex. 211 func (j *JobOnce) cancelWhileHoldingMutex() { 212 // remove the job from the kv store, if it exists 213 _ = j.pluginAPI.KVDelete(oncePrefix + j.key) 214 215 j.activeJobs.mu.Lock() 216 defer j.activeJobs.mu.Unlock() 217 delete(j.activeJobs.jobs, j.key) 218 219 j.doneOnce.Do(func() { 220 close(j.done) 221 }) 222 } 223 224 func addJitter() time.Duration { 225 return time.Duration(rand.Int63n(int64(scheduleOnceJitter))) 226 } 227 228 func normalizeAppErr(appErr *model.AppError) error { 229 if appErr == nil { 230 return nil 231 } 232 233 return appErr 234 }