go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/filter/txnBuf/state.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package txnBuf
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"sync"
    21  
    22  	"go.chromium.org/luci/common/data/stringset"
    23  	"go.chromium.org/luci/common/errors"
    24  	"go.chromium.org/luci/common/sync/parallel"
    25  
    26  	"go.chromium.org/luci/gae/impl/memory"
    27  	"go.chromium.org/luci/gae/service/datastore"
    28  	"go.chromium.org/luci/gae/service/info"
    29  )
    30  
    31  // DefaultSizeBudget is the size budget for the root transaction.
    32  //
    33  // Because our estimation algorithm isn't entirely correct, we take 5% off
    34  // the limit for encoding and estimate inaccuracies.
    35  //
    36  // 10MB taken on 2015/09/24:
    37  // https://cloud.google.com/appengine/docs/go/datastore/#Go_Quotas_and_limits
    38  const DefaultSizeBudget = int64((10 * 1000 * 1000) * 0.95)
    39  
    40  // DefaultWriteCountBudget is the maximum number of entities that can be written
    41  // in a single call.
    42  //
    43  // This is not known to be documented, and has instead been extracted from a
    44  // datastore error message.
    45  const DefaultWriteCountBudget = 500
    46  
    47  // sizeTracker tracks the size of a buffered transaction. The rules are simple:
    48  //   - deletes count for the size of their key, but 0 data
    49  //   - puts count for the size of their key plus the 'EstimateSize' for their
    50  //     data.
    51  type sizeTracker struct {
    52  	keyToSize map[string]int64
    53  	total     int64
    54  }
    55  
    56  // set states that the given key is being set to an entity with the size `val`.
    57  // A val of 0 means "I'm deleting this key"
    58  func (s *sizeTracker) set(key string, val int64) {
    59  	if s.keyToSize == nil {
    60  		s.keyToSize = make(map[string]int64)
    61  	}
    62  	prev, existed := s.keyToSize[key]
    63  	s.keyToSize[key] = val
    64  	s.total += val - prev
    65  	if !existed {
    66  		s.total += int64(len(key))
    67  	}
    68  }
    69  
    70  // get returns the currently tracked size for key, and wheter or not the key
    71  // has any tracked value.
    72  func (s *sizeTracker) get(key string) (int64, bool) {
    73  	size, has := s.keyToSize[key]
    74  	return size, has
    75  }
    76  
    77  // has returns true iff key has a tracked value.
    78  func (s *sizeTracker) has(key string) bool {
    79  	_, has := s.keyToSize[key]
    80  	return has
    81  }
    82  
    83  // numWrites returns the number of tracked write operations.
    84  func (s *sizeTracker) numWrites() int {
    85  	return len(s.keyToSize)
    86  }
    87  
    88  // dup returns a duplicate sizeTracker.
    89  func (s *sizeTracker) dup() *sizeTracker {
    90  	if len(s.keyToSize) == 0 {
    91  		return &sizeTracker{}
    92  	}
    93  	k2s := make(map[string]int64, len(s.keyToSize))
    94  	for k, v := range s.keyToSize {
    95  		k2s[k] = v
    96  	}
    97  	return &sizeTracker{k2s, s.total}
    98  }
    99  
   100  type txnBufState struct {
   101  	sync.Mutex
   102  
   103  	// encoded key -> size of entity. A size of 0 means that the entity is
   104  	// deleted.
   105  	entState *sizeTracker
   106  	bufDS    datastore.RawInterface
   107  
   108  	roots stringset.Set
   109  
   110  	kc       datastore.KeyContext
   111  	parentDS datastore.RawInterface
   112  
   113  	// sizeBudget is the number of bytes that this transaction has to operate
   114  	// within. It's only used when attempting to apply() the transaction, and
   115  	// it is the threshold for the delta of applying this transaction to the
   116  	// parent transaction. Note that a buffered transaction could actually have
   117  	// a negative delta if the parent transaction had many large entities which
   118  	// the inner transaction deleted.
   119  	sizeBudget int64
   120  	// countBudget is the number of entity writes that this transaction has to
   121  	// operate in.
   122  	writeCountBudget int
   123  }
   124  
   125  func withTxnBuf(ctx context.Context, cb func(context.Context) error, opts *datastore.TransactionOptions) error {
   126  	parentState, _ := ctx.Value(&dsTxnBufParent).(*txnBufState)
   127  	roots := stringset.New(0)
   128  	sizeBudget, writeCountBudget := DefaultSizeBudget, DefaultWriteCountBudget
   129  	if parentState != nil {
   130  		roots = parentState.roots.Dup()
   131  
   132  		sizeBudget = parentState.sizeBudget - parentState.entState.total
   133  		writeCountBudget = parentState.writeCountBudget - parentState.entState.numWrites()
   134  	}
   135  
   136  	state := &txnBufState{
   137  		entState:         &sizeTracker{},
   138  		bufDS:            memory.NewDatastore(ctx, info.Raw(ctx)),
   139  		roots:            roots,
   140  		kc:               datastore.GetKeyContext(ctx),
   141  		parentDS:         datastore.Raw(context.WithValue(ctx, &dsTxnBufHaveLock, true)),
   142  		sizeBudget:       sizeBudget,
   143  		writeCountBudget: writeCountBudget,
   144  	}
   145  	if err := cb(context.WithValue(ctx, &dsTxnBufParent, state)); err != nil {
   146  		return err
   147  	}
   148  
   149  	// no reason to unlock this ever. At this point it's toast.
   150  	state.Lock()
   151  
   152  	if parentState == nil {
   153  		return commitToReal(state)
   154  	}
   155  
   156  	if err := parentState.canApplyLocked(state); err != nil {
   157  		return err
   158  	}
   159  
   160  	parentState.commitLocked(state)
   161  	return nil
   162  }
   163  
   164  // item is a temporary object for representing key/entity pairs and their cache
   165  // state (e.g. if they exist in the in-memory datastore buffer or not).
   166  // Additionally item memoizes some common comparison strings. item objects
   167  // must never be persisted outside of a single function/query context.
   168  type item struct {
   169  	key      *datastore.Key
   170  	data     datastore.PropertyMap
   171  	buffered bool
   172  
   173  	encKey string
   174  
   175  	// cmpRow is used to hold the toComparableString value for this item during
   176  	// a query.
   177  	cmpRow string
   178  
   179  	// err is a bit of a hack for passing back synchronized errors from
   180  	// queryToIter.
   181  	err error
   182  }
   183  
   184  func (i *item) getEncKey() string {
   185  	if i.encKey == "" {
   186  		i.encKey = string(datastore.Serialize.ToBytes(i.key))
   187  	}
   188  	return i.encKey
   189  }
   190  
   191  func (i *item) getCmpRow(lower, upper []byte, order []datastore.IndexColumn) string {
   192  	if i.cmpRow == "" {
   193  		row, key := toComparableString(lower, upper, order, i.key, i.data)
   194  		i.cmpRow = string(row)
   195  		if i.encKey == "" {
   196  			i.encKey = string(key)
   197  		}
   198  	}
   199  	return i.cmpRow
   200  }
   201  
   202  func (t *txnBufState) updateRootsLocked(roots stringset.Set) error {
   203  	proposedRoots := stringset.New(1)
   204  	roots.Iter(func(root string) bool {
   205  		if !t.roots.Has(root) {
   206  			proposedRoots.Add(root)
   207  		}
   208  		return true
   209  	})
   210  	// only need to update the roots if they did something that required updating
   211  	if proposedRoots.Len() > 0 {
   212  		proposedRoots.Iter(func(root string) bool {
   213  			t.roots.Add(root)
   214  			return true
   215  		})
   216  	}
   217  	return nil
   218  }
   219  
   220  func (t *txnBufState) getMulti(keys []*datastore.Key, metas datastore.MultiMetaGetter, cb datastore.GetMultiCB, haveLock bool) error {
   221  	encKeys, roots := toEncoded(keys)
   222  	data := make([]item, len(keys))
   223  
   224  	idxMap := []int(nil)
   225  	toGetKeys := []*datastore.Key(nil)
   226  
   227  	lme := errors.NewLazyMultiError(len(keys))
   228  	err := func() error {
   229  		if !haveLock {
   230  			t.Lock()
   231  			defer t.Unlock()
   232  		}
   233  
   234  		if err := t.updateRootsLocked(roots); err != nil {
   235  			return err
   236  		}
   237  
   238  		for i, key := range keys {
   239  			data[i].key = key
   240  			data[i].encKey = encKeys[i]
   241  			if size, ok := t.entState.get(data[i].getEncKey()); ok {
   242  				data[i].buffered = true
   243  				if size > 0 {
   244  					idxMap = append(idxMap, i)
   245  					toGetKeys = append(toGetKeys, key)
   246  				}
   247  			}
   248  		}
   249  
   250  		if len(toGetKeys) > 0 {
   251  			t.bufDS.GetMulti(toGetKeys, nil, func(j int, pm datastore.PropertyMap, err error) {
   252  				impossible(err)
   253  				data[idxMap[j]].data = pm
   254  			})
   255  		}
   256  
   257  		idxMap = nil
   258  		getKeys := []*datastore.Key(nil)
   259  		getMetas := datastore.MultiMetaGetter(nil)
   260  
   261  		for i, itm := range data {
   262  			if !itm.buffered {
   263  				idxMap = append(idxMap, i)
   264  				getKeys = append(getKeys, itm.key)
   265  				getMetas = append(getMetas, metas.GetSingle(i))
   266  			}
   267  		}
   268  
   269  		if len(idxMap) > 0 {
   270  			err := t.parentDS.GetMulti(getKeys, getMetas, func(j int, pm datastore.PropertyMap, err error) {
   271  				if err != datastore.ErrNoSuchEntity {
   272  					i := idxMap[j]
   273  					if !lme.Assign(i, err) {
   274  						data[i].data = pm
   275  					}
   276  				}
   277  			})
   278  			if err != nil {
   279  				return err
   280  			}
   281  		}
   282  		return nil
   283  	}()
   284  	if err != nil {
   285  		return err
   286  	}
   287  
   288  	for i, itm := range data {
   289  		err := lme.GetOne(i)
   290  		if err != nil {
   291  			cb(i, nil, err)
   292  		} else if itm.data == nil {
   293  			cb(i, nil, datastore.ErrNoSuchEntity)
   294  		} else {
   295  			cb(i, itm.data, nil)
   296  		}
   297  	}
   298  	return nil
   299  }
   300  
   301  func (t *txnBufState) deleteMulti(keys []*datastore.Key, cb datastore.DeleteMultiCB, haveLock bool) error {
   302  	encKeys, roots := toEncoded(keys)
   303  
   304  	err := func() error {
   305  		if !haveLock {
   306  			t.Lock()
   307  			defer t.Unlock()
   308  		}
   309  
   310  		if err := t.updateRootsLocked(roots); err != nil {
   311  			return err
   312  		}
   313  
   314  		err := t.bufDS.DeleteMulti(keys, func(i int, err error) {
   315  			impossible(err)
   316  			t.entState.set(encKeys[i], 0)
   317  		})
   318  		impossible(err)
   319  		return nil
   320  	}()
   321  	if err != nil {
   322  		return err
   323  	}
   324  
   325  	for i := range keys {
   326  		cb(i, nil)
   327  	}
   328  
   329  	return nil
   330  }
   331  
   332  func (t *txnBufState) fixKeys(keys []*datastore.Key) ([]*datastore.Key, error) {
   333  	// Identify any incomplete keys and allocate IDs for them.
   334  	//
   335  	// In order to facilitate this, we will maintain a mapping of the
   336  	// incompleteKeys index to the key's corresponding index in the keys array.
   337  	// Any errors or allocations on incompleteKeys operations will be propagated
   338  	// to the correct keys index using this map.
   339  	var (
   340  		incompleteKeys []*datastore.Key
   341  		incompleteMap  map[int]int
   342  	)
   343  
   344  	for i, key := range keys {
   345  		if key.IsIncomplete() {
   346  			if incompleteMap == nil {
   347  				incompleteMap = make(map[int]int)
   348  			}
   349  			incompleteMap[len(incompleteKeys)] = i
   350  			incompleteKeys = append(incompleteKeys, key)
   351  		}
   352  	}
   353  	if len(incompleteKeys) == 0 {
   354  		return keys, nil
   355  	}
   356  
   357  	// We're going to update keys, so clone it.
   358  	keys, origKeys := make([]*datastore.Key, len(keys)), keys
   359  	copy(keys, origKeys)
   360  
   361  	// Intentionally call AllocateIDs without lock.
   362  	outerErr := errors.NewLazyMultiError(len(keys))
   363  	err := t.parentDS.AllocateIDs(incompleteKeys, func(i int, key *datastore.Key, err error) {
   364  		outerIdx := incompleteMap[i]
   365  
   366  		if err != nil {
   367  			outerErr.Assign(outerIdx, err)
   368  		} else {
   369  			keys[outerIdx] = key
   370  		}
   371  	})
   372  	if err != nil {
   373  		return nil, err
   374  	}
   375  	return keys, outerErr.Get()
   376  }
   377  
   378  func (t *txnBufState) putMulti(keys []*datastore.Key, vals []datastore.PropertyMap, cb datastore.NewKeyCB, haveLock bool) error {
   379  	keys, err := t.fixKeys(keys)
   380  	if err != nil {
   381  		for i, e := range err.(errors.MultiError) {
   382  			cb(i, nil, e)
   383  		}
   384  		return nil
   385  	}
   386  
   387  	encKeys, roots := toEncoded(keys)
   388  
   389  	err = func() error {
   390  		if !haveLock {
   391  			t.Lock()
   392  			defer t.Unlock()
   393  		}
   394  
   395  		if err := t.updateRootsLocked(roots); err != nil {
   396  			return err
   397  		}
   398  
   399  		err := t.bufDS.PutMulti(keys, vals, func(i int, k *datastore.Key, err error) {
   400  			impossible(err)
   401  			t.entState.set(encKeys[i], vals[i].EstimateSize())
   402  		})
   403  		impossible(err)
   404  		return nil
   405  	}()
   406  	if err != nil {
   407  		return err
   408  	}
   409  
   410  	for i, k := range keys {
   411  		cb(i, k, nil)
   412  	}
   413  	return nil
   414  }
   415  
   416  func commitToReal(s *txnBufState) error {
   417  	toPut, toPutKeys, toDel := s.effect()
   418  
   419  	return parallel.FanOutIn(func(ch chan<- func() error) {
   420  		if len(toPut) > 0 {
   421  			ch <- func() error {
   422  				mErr := errors.NewLazyMultiError(len(toPut))
   423  				err := s.parentDS.PutMulti(toPutKeys, toPut, func(i int, _ *datastore.Key, err error) {
   424  					mErr.Assign(i, err)
   425  				})
   426  				if err == nil {
   427  					err = mErr.Get()
   428  				}
   429  				return err
   430  			}
   431  		}
   432  		if len(toDel) > 0 {
   433  			ch <- func() error {
   434  				mErr := errors.NewLazyMultiError(len(toDel))
   435  				err := s.parentDS.DeleteMulti(toDel, func(i int, err error) {
   436  					mErr.Assign(i, err)
   437  				})
   438  				if err == nil {
   439  					err = mErr.Get()
   440  				}
   441  				return err
   442  			}
   443  		}
   444  	})
   445  }
   446  
   447  func (t *txnBufState) effect() (toPut []datastore.PropertyMap, toPutKeys, toDel []*datastore.Key) {
   448  	// TODO(riannucci): preallocate return slices
   449  
   450  	// need to pull all items out of the in-memory datastore. Fortunately we have
   451  	// kindless queries, and we disabled all the special entities, so just
   452  	// run a kindless query without any filters and it will return all data
   453  	// currently in bufDS :).
   454  	fq, err := datastore.NewQuery("").Finalize()
   455  	impossible(err)
   456  
   457  	err = t.bufDS.Run(fq, func(key *datastore.Key, data datastore.PropertyMap, _ datastore.CursorCB) error {
   458  		toPutKeys = append(toPutKeys, key)
   459  		toPut = append(toPut, data)
   460  		return nil
   461  	})
   462  	memoryCorruption(err)
   463  
   464  	for keyStr, size := range t.entState.keyToSize {
   465  		if size == 0 {
   466  			k, err := datastore.Deserializer{KeyContext: t.kc}.Key(bytes.NewBufferString(keyStr))
   467  			memoryCorruption(err)
   468  			toDel = append(toDel, k)
   469  		}
   470  	}
   471  
   472  	return
   473  }
   474  
   475  func (t *txnBufState) canApplyLocked(s *txnBufState) error {
   476  	proposedState := t.entState.dup()
   477  
   478  	for k, v := range s.entState.keyToSize {
   479  		proposedState.set(k, v)
   480  	}
   481  	switch {
   482  	case proposedState.numWrites() > t.writeCountBudget:
   483  		// The new net number of writes must be below the parent's write count
   484  		// cutoff.
   485  		fallthrough
   486  
   487  	case proposedState.total > t.sizeBudget:
   488  		// Make sure our new calculated size is within the parent's size budget.
   489  		//
   490  		// We have:
   491  		// - proposedState.total: The "new world" total bytes were this child
   492  		//   transaction committed to the parent.
   493  		// - t.sizeBudget: The maximum number of bytes that this parent can
   494  		//   accommodate.
   495  		return ErrTransactionTooLarge
   496  	}
   497  
   498  	return nil
   499  }
   500  
   501  func (t *txnBufState) commitLocked(s *txnBufState) {
   502  	toPut, toPutKeys, toDel := s.effect()
   503  
   504  	if len(toPut) > 0 {
   505  		impossible(t.putMulti(toPutKeys, toPut,
   506  			func(_ int, _ *datastore.Key, err error) { impossible(err) }, true))
   507  	}
   508  
   509  	if len(toDel) > 0 {
   510  		impossible(t.deleteMulti(toDel,
   511  			func(_ int, err error) { impossible(err) }, true))
   512  	}
   513  }
   514  
   515  // toEncoded returns a list of all of the serialized versions of these keys,
   516  // plus a stringset of all the encoded root keys that `keys` represents.
   517  func toEncoded(keys []*datastore.Key) (full []string, roots stringset.Set) {
   518  	roots = stringset.New(len(keys))
   519  	full = make([]string, len(keys))
   520  	for i, k := range keys {
   521  		roots.Add(string(datastore.Serialize.ToBytes(k.Root())))
   522  		full[i] = string(datastore.Serialize.ToBytes(k))
   523  	}
   524  	return
   525  }