go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/filter/txnBuf/state.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package txnBuf 16 17 import ( 18 "bytes" 19 "context" 20 "sync" 21 22 "go.chromium.org/luci/common/data/stringset" 23 "go.chromium.org/luci/common/errors" 24 "go.chromium.org/luci/common/sync/parallel" 25 26 "go.chromium.org/luci/gae/impl/memory" 27 "go.chromium.org/luci/gae/service/datastore" 28 "go.chromium.org/luci/gae/service/info" 29 ) 30 31 // DefaultSizeBudget is the size budget for the root transaction. 32 // 33 // Because our estimation algorithm isn't entirely correct, we take 5% off 34 // the limit for encoding and estimate inaccuracies. 35 // 36 // 10MB taken on 2015/09/24: 37 // https://cloud.google.com/appengine/docs/go/datastore/#Go_Quotas_and_limits 38 const DefaultSizeBudget = int64((10 * 1000 * 1000) * 0.95) 39 40 // DefaultWriteCountBudget is the maximum number of entities that can be written 41 // in a single call. 42 // 43 // This is not known to be documented, and has instead been extracted from a 44 // datastore error message. 45 const DefaultWriteCountBudget = 500 46 47 // sizeTracker tracks the size of a buffered transaction. The rules are simple: 48 // - deletes count for the size of their key, but 0 data 49 // - puts count for the size of their key plus the 'EstimateSize' for their 50 // data. 51 type sizeTracker struct { 52 keyToSize map[string]int64 53 total int64 54 } 55 56 // set states that the given key is being set to an entity with the size `val`. 57 // A val of 0 means "I'm deleting this key" 58 func (s *sizeTracker) set(key string, val int64) { 59 if s.keyToSize == nil { 60 s.keyToSize = make(map[string]int64) 61 } 62 prev, existed := s.keyToSize[key] 63 s.keyToSize[key] = val 64 s.total += val - prev 65 if !existed { 66 s.total += int64(len(key)) 67 } 68 } 69 70 // get returns the currently tracked size for key, and wheter or not the key 71 // has any tracked value. 72 func (s *sizeTracker) get(key string) (int64, bool) { 73 size, has := s.keyToSize[key] 74 return size, has 75 } 76 77 // has returns true iff key has a tracked value. 78 func (s *sizeTracker) has(key string) bool { 79 _, has := s.keyToSize[key] 80 return has 81 } 82 83 // numWrites returns the number of tracked write operations. 84 func (s *sizeTracker) numWrites() int { 85 return len(s.keyToSize) 86 } 87 88 // dup returns a duplicate sizeTracker. 89 func (s *sizeTracker) dup() *sizeTracker { 90 if len(s.keyToSize) == 0 { 91 return &sizeTracker{} 92 } 93 k2s := make(map[string]int64, len(s.keyToSize)) 94 for k, v := range s.keyToSize { 95 k2s[k] = v 96 } 97 return &sizeTracker{k2s, s.total} 98 } 99 100 type txnBufState struct { 101 sync.Mutex 102 103 // encoded key -> size of entity. A size of 0 means that the entity is 104 // deleted. 105 entState *sizeTracker 106 bufDS datastore.RawInterface 107 108 roots stringset.Set 109 110 kc datastore.KeyContext 111 parentDS datastore.RawInterface 112 113 // sizeBudget is the number of bytes that this transaction has to operate 114 // within. It's only used when attempting to apply() the transaction, and 115 // it is the threshold for the delta of applying this transaction to the 116 // parent transaction. Note that a buffered transaction could actually have 117 // a negative delta if the parent transaction had many large entities which 118 // the inner transaction deleted. 119 sizeBudget int64 120 // countBudget is the number of entity writes that this transaction has to 121 // operate in. 122 writeCountBudget int 123 } 124 125 func withTxnBuf(ctx context.Context, cb func(context.Context) error, opts *datastore.TransactionOptions) error { 126 parentState, _ := ctx.Value(&dsTxnBufParent).(*txnBufState) 127 roots := stringset.New(0) 128 sizeBudget, writeCountBudget := DefaultSizeBudget, DefaultWriteCountBudget 129 if parentState != nil { 130 roots = parentState.roots.Dup() 131 132 sizeBudget = parentState.sizeBudget - parentState.entState.total 133 writeCountBudget = parentState.writeCountBudget - parentState.entState.numWrites() 134 } 135 136 state := &txnBufState{ 137 entState: &sizeTracker{}, 138 bufDS: memory.NewDatastore(ctx, info.Raw(ctx)), 139 roots: roots, 140 kc: datastore.GetKeyContext(ctx), 141 parentDS: datastore.Raw(context.WithValue(ctx, &dsTxnBufHaveLock, true)), 142 sizeBudget: sizeBudget, 143 writeCountBudget: writeCountBudget, 144 } 145 if err := cb(context.WithValue(ctx, &dsTxnBufParent, state)); err != nil { 146 return err 147 } 148 149 // no reason to unlock this ever. At this point it's toast. 150 state.Lock() 151 152 if parentState == nil { 153 return commitToReal(state) 154 } 155 156 if err := parentState.canApplyLocked(state); err != nil { 157 return err 158 } 159 160 parentState.commitLocked(state) 161 return nil 162 } 163 164 // item is a temporary object for representing key/entity pairs and their cache 165 // state (e.g. if they exist in the in-memory datastore buffer or not). 166 // Additionally item memoizes some common comparison strings. item objects 167 // must never be persisted outside of a single function/query context. 168 type item struct { 169 key *datastore.Key 170 data datastore.PropertyMap 171 buffered bool 172 173 encKey string 174 175 // cmpRow is used to hold the toComparableString value for this item during 176 // a query. 177 cmpRow string 178 179 // err is a bit of a hack for passing back synchronized errors from 180 // queryToIter. 181 err error 182 } 183 184 func (i *item) getEncKey() string { 185 if i.encKey == "" { 186 i.encKey = string(datastore.Serialize.ToBytes(i.key)) 187 } 188 return i.encKey 189 } 190 191 func (i *item) getCmpRow(lower, upper []byte, order []datastore.IndexColumn) string { 192 if i.cmpRow == "" { 193 row, key := toComparableString(lower, upper, order, i.key, i.data) 194 i.cmpRow = string(row) 195 if i.encKey == "" { 196 i.encKey = string(key) 197 } 198 } 199 return i.cmpRow 200 } 201 202 func (t *txnBufState) updateRootsLocked(roots stringset.Set) error { 203 proposedRoots := stringset.New(1) 204 roots.Iter(func(root string) bool { 205 if !t.roots.Has(root) { 206 proposedRoots.Add(root) 207 } 208 return true 209 }) 210 // only need to update the roots if they did something that required updating 211 if proposedRoots.Len() > 0 { 212 proposedRoots.Iter(func(root string) bool { 213 t.roots.Add(root) 214 return true 215 }) 216 } 217 return nil 218 } 219 220 func (t *txnBufState) getMulti(keys []*datastore.Key, metas datastore.MultiMetaGetter, cb datastore.GetMultiCB, haveLock bool) error { 221 encKeys, roots := toEncoded(keys) 222 data := make([]item, len(keys)) 223 224 idxMap := []int(nil) 225 toGetKeys := []*datastore.Key(nil) 226 227 lme := errors.NewLazyMultiError(len(keys)) 228 err := func() error { 229 if !haveLock { 230 t.Lock() 231 defer t.Unlock() 232 } 233 234 if err := t.updateRootsLocked(roots); err != nil { 235 return err 236 } 237 238 for i, key := range keys { 239 data[i].key = key 240 data[i].encKey = encKeys[i] 241 if size, ok := t.entState.get(data[i].getEncKey()); ok { 242 data[i].buffered = true 243 if size > 0 { 244 idxMap = append(idxMap, i) 245 toGetKeys = append(toGetKeys, key) 246 } 247 } 248 } 249 250 if len(toGetKeys) > 0 { 251 t.bufDS.GetMulti(toGetKeys, nil, func(j int, pm datastore.PropertyMap, err error) { 252 impossible(err) 253 data[idxMap[j]].data = pm 254 }) 255 } 256 257 idxMap = nil 258 getKeys := []*datastore.Key(nil) 259 getMetas := datastore.MultiMetaGetter(nil) 260 261 for i, itm := range data { 262 if !itm.buffered { 263 idxMap = append(idxMap, i) 264 getKeys = append(getKeys, itm.key) 265 getMetas = append(getMetas, metas.GetSingle(i)) 266 } 267 } 268 269 if len(idxMap) > 0 { 270 err := t.parentDS.GetMulti(getKeys, getMetas, func(j int, pm datastore.PropertyMap, err error) { 271 if err != datastore.ErrNoSuchEntity { 272 i := idxMap[j] 273 if !lme.Assign(i, err) { 274 data[i].data = pm 275 } 276 } 277 }) 278 if err != nil { 279 return err 280 } 281 } 282 return nil 283 }() 284 if err != nil { 285 return err 286 } 287 288 for i, itm := range data { 289 err := lme.GetOne(i) 290 if err != nil { 291 cb(i, nil, err) 292 } else if itm.data == nil { 293 cb(i, nil, datastore.ErrNoSuchEntity) 294 } else { 295 cb(i, itm.data, nil) 296 } 297 } 298 return nil 299 } 300 301 func (t *txnBufState) deleteMulti(keys []*datastore.Key, cb datastore.DeleteMultiCB, haveLock bool) error { 302 encKeys, roots := toEncoded(keys) 303 304 err := func() error { 305 if !haveLock { 306 t.Lock() 307 defer t.Unlock() 308 } 309 310 if err := t.updateRootsLocked(roots); err != nil { 311 return err 312 } 313 314 err := t.bufDS.DeleteMulti(keys, func(i int, err error) { 315 impossible(err) 316 t.entState.set(encKeys[i], 0) 317 }) 318 impossible(err) 319 return nil 320 }() 321 if err != nil { 322 return err 323 } 324 325 for i := range keys { 326 cb(i, nil) 327 } 328 329 return nil 330 } 331 332 func (t *txnBufState) fixKeys(keys []*datastore.Key) ([]*datastore.Key, error) { 333 // Identify any incomplete keys and allocate IDs for them. 334 // 335 // In order to facilitate this, we will maintain a mapping of the 336 // incompleteKeys index to the key's corresponding index in the keys array. 337 // Any errors or allocations on incompleteKeys operations will be propagated 338 // to the correct keys index using this map. 339 var ( 340 incompleteKeys []*datastore.Key 341 incompleteMap map[int]int 342 ) 343 344 for i, key := range keys { 345 if key.IsIncomplete() { 346 if incompleteMap == nil { 347 incompleteMap = make(map[int]int) 348 } 349 incompleteMap[len(incompleteKeys)] = i 350 incompleteKeys = append(incompleteKeys, key) 351 } 352 } 353 if len(incompleteKeys) == 0 { 354 return keys, nil 355 } 356 357 // We're going to update keys, so clone it. 358 keys, origKeys := make([]*datastore.Key, len(keys)), keys 359 copy(keys, origKeys) 360 361 // Intentionally call AllocateIDs without lock. 362 outerErr := errors.NewLazyMultiError(len(keys)) 363 err := t.parentDS.AllocateIDs(incompleteKeys, func(i int, key *datastore.Key, err error) { 364 outerIdx := incompleteMap[i] 365 366 if err != nil { 367 outerErr.Assign(outerIdx, err) 368 } else { 369 keys[outerIdx] = key 370 } 371 }) 372 if err != nil { 373 return nil, err 374 } 375 return keys, outerErr.Get() 376 } 377 378 func (t *txnBufState) putMulti(keys []*datastore.Key, vals []datastore.PropertyMap, cb datastore.NewKeyCB, haveLock bool) error { 379 keys, err := t.fixKeys(keys) 380 if err != nil { 381 for i, e := range err.(errors.MultiError) { 382 cb(i, nil, e) 383 } 384 return nil 385 } 386 387 encKeys, roots := toEncoded(keys) 388 389 err = func() error { 390 if !haveLock { 391 t.Lock() 392 defer t.Unlock() 393 } 394 395 if err := t.updateRootsLocked(roots); err != nil { 396 return err 397 } 398 399 err := t.bufDS.PutMulti(keys, vals, func(i int, k *datastore.Key, err error) { 400 impossible(err) 401 t.entState.set(encKeys[i], vals[i].EstimateSize()) 402 }) 403 impossible(err) 404 return nil 405 }() 406 if err != nil { 407 return err 408 } 409 410 for i, k := range keys { 411 cb(i, k, nil) 412 } 413 return nil 414 } 415 416 func commitToReal(s *txnBufState) error { 417 toPut, toPutKeys, toDel := s.effect() 418 419 return parallel.FanOutIn(func(ch chan<- func() error) { 420 if len(toPut) > 0 { 421 ch <- func() error { 422 mErr := errors.NewLazyMultiError(len(toPut)) 423 err := s.parentDS.PutMulti(toPutKeys, toPut, func(i int, _ *datastore.Key, err error) { 424 mErr.Assign(i, err) 425 }) 426 if err == nil { 427 err = mErr.Get() 428 } 429 return err 430 } 431 } 432 if len(toDel) > 0 { 433 ch <- func() error { 434 mErr := errors.NewLazyMultiError(len(toDel)) 435 err := s.parentDS.DeleteMulti(toDel, func(i int, err error) { 436 mErr.Assign(i, err) 437 }) 438 if err == nil { 439 err = mErr.Get() 440 } 441 return err 442 } 443 } 444 }) 445 } 446 447 func (t *txnBufState) effect() (toPut []datastore.PropertyMap, toPutKeys, toDel []*datastore.Key) { 448 // TODO(riannucci): preallocate return slices 449 450 // need to pull all items out of the in-memory datastore. Fortunately we have 451 // kindless queries, and we disabled all the special entities, so just 452 // run a kindless query without any filters and it will return all data 453 // currently in bufDS :). 454 fq, err := datastore.NewQuery("").Finalize() 455 impossible(err) 456 457 err = t.bufDS.Run(fq, func(key *datastore.Key, data datastore.PropertyMap, _ datastore.CursorCB) error { 458 toPutKeys = append(toPutKeys, key) 459 toPut = append(toPut, data) 460 return nil 461 }) 462 memoryCorruption(err) 463 464 for keyStr, size := range t.entState.keyToSize { 465 if size == 0 { 466 k, err := datastore.Deserializer{KeyContext: t.kc}.Key(bytes.NewBufferString(keyStr)) 467 memoryCorruption(err) 468 toDel = append(toDel, k) 469 } 470 } 471 472 return 473 } 474 475 func (t *txnBufState) canApplyLocked(s *txnBufState) error { 476 proposedState := t.entState.dup() 477 478 for k, v := range s.entState.keyToSize { 479 proposedState.set(k, v) 480 } 481 switch { 482 case proposedState.numWrites() > t.writeCountBudget: 483 // The new net number of writes must be below the parent's write count 484 // cutoff. 485 fallthrough 486 487 case proposedState.total > t.sizeBudget: 488 // Make sure our new calculated size is within the parent's size budget. 489 // 490 // We have: 491 // - proposedState.total: The "new world" total bytes were this child 492 // transaction committed to the parent. 493 // - t.sizeBudget: The maximum number of bytes that this parent can 494 // accommodate. 495 return ErrTransactionTooLarge 496 } 497 498 return nil 499 } 500 501 func (t *txnBufState) commitLocked(s *txnBufState) { 502 toPut, toPutKeys, toDel := s.effect() 503 504 if len(toPut) > 0 { 505 impossible(t.putMulti(toPutKeys, toPut, 506 func(_ int, _ *datastore.Key, err error) { impossible(err) }, true)) 507 } 508 509 if len(toDel) > 0 { 510 impossible(t.deleteMulti(toDel, 511 func(_ int, err error) { impossible(err) }, true)) 512 } 513 } 514 515 // toEncoded returns a list of all of the serialized versions of these keys, 516 // plus a stringset of all the encoded root keys that `keys` represents. 517 func toEncoded(keys []*datastore.Key) (full []string, roots stringset.Set) { 518 roots = stringset.New(len(keys)) 519 full = make([]string, len(keys)) 520 for i, k := range keys { 521 roots.Add(string(datastore.Serialize.ToBytes(k.Root()))) 522 full[i] = string(datastore.Serialize.ToBytes(k)) 523 } 524 return 525 }