go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/engine/utils.go (about) 1 // Copyright 2017 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package engine 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 "sync" 22 "time" 23 24 "google.golang.org/protobuf/proto" 25 26 "go.chromium.org/luci/gae/service/datastore" 27 "go.chromium.org/luci/gae/service/memcache" 28 29 "go.chromium.org/luci/common/clock" 30 "go.chromium.org/luci/common/errors" 31 "go.chromium.org/luci/common/logging" 32 "go.chromium.org/luci/common/retry/transient" 33 34 "go.chromium.org/luci/scheduler/appengine/internal" 35 ) 36 37 // assertInTransaction panics if the context is not transactional. 38 func assertInTransaction(c context.Context) { 39 if datastore.CurrentTransaction(c) == nil { 40 panic("expecting to be called from inside a transaction") 41 } 42 } 43 44 // assertNotInTransaction panics if the context is transactional. 45 func assertNotInTransaction(c context.Context) { 46 if datastore.CurrentTransaction(c) != nil { 47 panic("expecting to be called from outside transactions") 48 } 49 } 50 51 // debugLog mutates a string by appending a line to it. 52 func debugLog(c context.Context, str *string, format string, args ...any) { 53 prefix := clock.Now(c).UTC().Format("[15:04:05.000] ") 54 *str += prefix + fmt.Sprintf(format+"\n", args...) 55 } 56 57 // defaultTransactionOptions is used for all transactions. 58 // 59 // Almost all transactions done by the scheduler service happen in background 60 // task queues, it is fine to retry more there. 61 var defaultTransactionOptions = datastore.TransactionOptions{ 62 Attempts: 10, 63 } 64 65 // abortTransaction makes the error abort the transaction (even if it is marked 66 // as transient). 67 // 68 // See runTxn for more info. This is used primarily by errUpdateConflict. 69 var abortTransaction = errors.BoolTag{Key: errors.NewTagKey("this error aborts the transaction")} 70 71 // runTxn runs a datastore transaction retrying the body on transient errors or 72 // when encountering a commit conflict. 73 // 74 // It will NOT retry errors (even if transient) marked with abortTransaction 75 // tag. This is primarily used to tag errors that are transient at a level 76 // higher than the transaction: errors marked with both transient.Tag and 77 // abortTransaction are not retried by runTxn, but may be retried by something 78 // on top (like Task Queue). 79 func runTxn(c context.Context, cb func(context.Context) error) error { 80 var attempt int 81 var innerErr error 82 83 err := datastore.RunInTransaction(c, func(c context.Context) error { 84 attempt++ 85 if attempt != 1 { 86 if innerErr != nil { 87 logging.Warningf(c, "Retrying the transaction after the error: %s", innerErr) 88 } else { 89 logging.Warningf(c, "Retrying the transaction: failed to commit") 90 } 91 } 92 innerErr = cb(c) 93 if transient.Tag.In(innerErr) && !abortTransaction.In(innerErr) { 94 return datastore.ErrConcurrentTransaction // causes a retry 95 } 96 return innerErr 97 }, &defaultTransactionOptions) 98 99 if err != nil { 100 logging.WithError(err).Errorf(c, "Transaction failed") 101 if innerErr != nil { 102 return innerErr 103 } 104 // Here it can only be a commit error (i.e. produced by RunInTransaction 105 // itself, not by its callback). We treat them as transient. 106 return transient.Tag.Apply(err) 107 } 108 109 return nil 110 } 111 112 // runIsolatedTxn is like runTxn, except it executes the callback in a new 113 // isolated transaction (even if the original context is already transactional). 114 func runIsolatedTxn(c context.Context, cb func(context.Context) error) error { 115 return runTxn(datastore.WithoutTransaction(c), cb) 116 } 117 118 // equalSortedLists returns true if lists contain the same sequence of strings. 119 func equalSortedLists(a, b []string) bool { 120 if len(a) != len(b) { 121 return false 122 } 123 for i, s := range a { 124 if s != b[i] { 125 return false 126 } 127 } 128 return true 129 } 130 131 // equalInt64Lists returns true if two lists of int64 are equal. 132 // 133 // Order is important. 134 func equalInt64Lists(a, b []int64) bool { 135 if len(a) != len(b) { 136 return false 137 } 138 for i, s := range a { 139 if s != b[i] { 140 return false 141 } 142 } 143 return true 144 } 145 146 // marshalTriggersList serializes list of triggers. 147 // 148 // Panics on errors. 149 func marshalTriggersList(t []*internal.Trigger) []byte { 150 if len(t) == 0 { 151 return nil 152 } 153 blob, err := proto.Marshal(&internal.TriggerList{Triggers: t}) 154 if err != nil { 155 panic(err) 156 } 157 return blob 158 } 159 160 // unmarshalTriggersList deserializes list of triggers. 161 func unmarshalTriggersList(blob []byte) ([]*internal.Trigger, error) { 162 if len(blob) == 0 { 163 return nil, nil 164 } 165 list := internal.TriggerList{} 166 if err := proto.Unmarshal(blob, &list); err != nil { 167 return nil, err 168 } 169 return list.Triggers, nil 170 } 171 172 // mutateTriggersList deserializes the list, calls a callback, which modifies 173 // the list and serializes it back. 174 func mutateTriggersList(blob *[]byte, cb func(*[]*internal.Trigger)) error { 175 list, err := unmarshalTriggersList(*blob) 176 if err != nil { 177 return err 178 } 179 cb(&list) 180 *blob = marshalTriggersList(list) 181 return nil 182 } 183 184 // sortTriggers sorts the triggers by time, most recent last. 185 func sortTriggers(t []*internal.Trigger) { 186 sort.Slice(t, func(i, j int) bool { return isTriggerOlder(t[i], t[j]) }) 187 } 188 189 // isTriggerOlder returns true if t1 is older than t2. 190 // 191 // Compares IDs in case of a tie. 192 func isTriggerOlder(t1, t2 *internal.Trigger) bool { 193 ts1 := t1.Created.AsTime() 194 ts2 := t2.Created.AsTime() 195 switch { 196 case ts1.After(ts2): 197 return false 198 case ts2.After(ts1): 199 return true 200 default: // equal timestamps 201 if t1.OrderInBatch != t2.OrderInBatch { 202 return t1.OrderInBatch < t2.OrderInBatch 203 } 204 return t1.Id < t2.Id 205 } 206 } 207 208 // marshalTimersList serializes list of timers. 209 // 210 // Panics on errors. 211 func marshalTimersList(t []*internal.Timer) []byte { 212 if len(t) == 0 { 213 return nil 214 } 215 blob, err := proto.Marshal(&internal.TimerList{Timers: t}) 216 if err != nil { 217 panic(err) 218 } 219 return blob 220 } 221 222 // unmarshalTimersList deserializes list of timers. 223 func unmarshalTimersList(blob []byte) ([]*internal.Timer, error) { 224 if len(blob) == 0 { 225 return nil, nil 226 } 227 list := internal.TimerList{} 228 if err := proto.Unmarshal(blob, &list); err != nil { 229 return nil, err 230 } 231 return list.Timers, nil 232 } 233 234 // mutateTimersList deserializes the list, calls a callback, which modifies 235 // the list and serializes it back. 236 func mutateTimersList(blob *[]byte, cb func(*[]*internal.Timer)) error { 237 list, err := unmarshalTimersList(*blob) 238 if err != nil { 239 return err 240 } 241 cb(&list) 242 *blob = marshalTimersList(list) 243 return nil 244 } 245 246 // marshalFinishedInvs marshals list of invocations into FinishedInvocationList. 247 // 248 // Panics on errors. 249 func marshalFinishedInvs(invs []*internal.FinishedInvocation) []byte { 250 if len(invs) == 0 { 251 return nil 252 } 253 blob, err := proto.Marshal(&internal.FinishedInvocationList{Invocations: invs}) 254 if err != nil { 255 panic(err) 256 } 257 return blob 258 } 259 260 // unmarshalFinishedInvs unmarshals FinishedInvocationList proto message. 261 func unmarshalFinishedInvs(raw []byte) ([]*internal.FinishedInvocation, error) { 262 if len(raw) == 0 { 263 return nil, nil 264 } 265 invs := internal.FinishedInvocationList{} 266 if err := proto.Unmarshal(raw, &invs); err != nil { 267 return nil, err 268 } 269 return invs.Invocations, nil 270 } 271 272 // filteredFinishedInvocations unmarshals FinishedInvocationList and filters 273 // it to keep only entries whose Finished timestamp is newer than 'oldest'. 274 func filteredFinishedInvs(raw []byte, oldest time.Time) ([]*internal.FinishedInvocation, error) { 275 invs, err := unmarshalFinishedInvs(raw) 276 if err != nil { 277 return nil, err 278 } 279 filtered := make([]*internal.FinishedInvocation, 0, len(invs)) 280 for _, inv := range invs { 281 if inv.Finished.AsTime().After(oldest) { 282 filtered = append(filtered, inv) 283 } 284 } 285 return filtered, nil 286 } 287 288 // opsCache "remembers" recently executed operations, and skips executing them 289 // if they already were done. 290 // 291 // Expected cardinality of a set of all possible actions should be small (we 292 // store the cache in memory). 293 type opsCache struct { 294 lock sync.RWMutex 295 doneFlags map[string]bool 296 } 297 298 // Do calls callback only if it wasn't called before. 299 // 300 // Works on best effort basis: callback can and will be called multiple times 301 // (just not the every time 'Do' is called). 302 // 303 // Keeps "done" flag in local memory and in memcache (using 'key' as 304 // identifier). The callback should be idempotent, since it still may be called 305 // multiple times if multiple processes attempt to execute the action at once. 306 func (o *opsCache) Do(c context.Context, key string, cb func() error) error { 307 // Check the local cache. 308 if o.getFlag(key) { 309 return nil 310 } 311 312 // Check the global cache. 313 switch _, err := memcache.GetKey(c, key); { 314 case err == nil: 315 o.setFlag(key) 316 return nil 317 case err == memcache.ErrCacheMiss: 318 break 319 default: 320 logging.WithError(err).Warningf(c, "opsCache failed to check memcache, will proceed executing op") 321 } 322 323 // Do it. 324 if err := cb(); err != nil { 325 return err 326 } 327 328 // Store in the local cache. 329 o.setFlag(key) 330 331 // Store in the global cache. Ignore errors, it's not a big deal. 332 item := memcache.NewItem(c, key) 333 item.SetValue([]byte("ok")) 334 item.SetExpiration(24 * time.Hour) 335 if err := memcache.Set(c, item); err != nil { 336 logging.WithError(err).Warningf(c, "opsCache failed to write item to memcache") 337 } 338 339 return nil 340 } 341 342 func (o *opsCache) getFlag(key string) bool { 343 o.lock.RLock() 344 defer o.lock.RUnlock() 345 return o.doneFlags[key] 346 } 347 348 func (o *opsCache) setFlag(key string) { 349 o.lock.Lock() 350 defer o.lock.Unlock() 351 if o.doneFlags == nil { 352 o.doneFlags = map[string]bool{} 353 } 354 o.doneFlags[key] = true 355 }