github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/oracle.go (about)

     1  /*
     2   * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package posting
    18  
    19  import (
    20  	"context"
    21  	"math"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/dgraph-io/dgraph/protos/pb"
    27  	"github.com/dgraph-io/dgraph/x"
    28  	"github.com/golang/glog"
    29  	ostats "go.opencensus.io/stats"
    30  )
    31  
    32  var o *oracle
    33  
    34  // Oracle returns the global oracle instance.
    35  // TODO: Oracle should probably be located in worker package, instead of posting
    36  // package now that we don't run inSnapshot anymore.
    37  func Oracle() *oracle {
    38  	return o
    39  }
    40  
    41  func init() {
    42  	o = new(oracle)
    43  	o.init()
    44  }
    45  
    46  // Txn represents a transaction.
    47  type Txn struct {
    48  	StartTs uint64
    49  
    50  	// atomic
    51  	shouldAbort uint32
    52  	// Fields which can changed after init
    53  	sync.Mutex
    54  
    55  	// Keeps track of conflict keys that should be used to determine if this
    56  	// transaction conflicts with another.
    57  	conflicts map[uint64]struct{}
    58  
    59  	// Keeps track of last update wall clock. We use this fact later to
    60  	// determine unhealthy, stale txns.
    61  	lastUpdate time.Time
    62  
    63  	cache *LocalCache // This pointer does not get modified.
    64  }
    65  
    66  // NewTxn returns a new Txn instance.
    67  func NewTxn(startTs uint64) *Txn {
    68  	return &Txn{
    69  		StartTs:    startTs,
    70  		cache:      NewLocalCache(startTs),
    71  		lastUpdate: time.Now(),
    72  	}
    73  }
    74  
    75  // Get retrieves the posting list for the given list from the local cache.
    76  func (txn *Txn) Get(key []byte) (*List, error) {
    77  	return txn.cache.Get(key)
    78  }
    79  
    80  // GetFromDelta retrieves the posting list from delta cache, not from Badger.
    81  func (txn *Txn) GetFromDelta(key []byte) (*List, error) {
    82  	return txn.cache.GetFromDelta(key)
    83  }
    84  
    85  // Update calls UpdateDeltasAndDiscardLists on the local cache.
    86  func (txn *Txn) Update() {
    87  	txn.cache.UpdateDeltasAndDiscardLists()
    88  }
    89  
    90  // Store is used by tests.
    91  func (txn *Txn) Store(pl *List) *List {
    92  	return txn.cache.SetIfAbsent(string(pl.key), pl)
    93  }
    94  
    95  type oracle struct {
    96  	x.SafeMutex
    97  
    98  	// max start ts given out by Zero. Do not use mutex on this, only use atomics.
    99  	maxAssigned uint64
   100  
   101  	// Keeps track of all the startTs we have seen so far, based on the mutations. Then as
   102  	// transactions are committed or aborted, we delete entries from the startTs map. When taking a
   103  	// snapshot, we need to know the minimum start ts present in the map, which represents a
   104  	// mutation which has not yet been committed or aborted.  As we iterate over entries, we should
   105  	// only discard those whose StartTs is below this minimum pending start ts.
   106  	pendingTxns map[uint64]*Txn
   107  
   108  	// Used for waiting logic for transactions with startTs > maxpending so that we don't read an
   109  	// uncommitted transaction.
   110  	waiters map[uint64][]chan struct{}
   111  }
   112  
   113  func (o *oracle) init() {
   114  	o.waiters = make(map[uint64][]chan struct{})
   115  	o.pendingTxns = make(map[uint64]*Txn)
   116  }
   117  
   118  func (o *oracle) RegisterStartTs(ts uint64) *Txn {
   119  	o.Lock()
   120  	defer o.Unlock()
   121  	txn, ok := o.pendingTxns[ts]
   122  	if ok {
   123  		txn.lastUpdate = time.Now()
   124  	} else {
   125  		txn = NewTxn(ts)
   126  		o.pendingTxns[ts] = txn
   127  	}
   128  	return txn
   129  }
   130  
   131  func (o *oracle) CacheAt(ts uint64) *LocalCache {
   132  	o.RLock()
   133  	defer o.RUnlock()
   134  	txn, ok := o.pendingTxns[ts]
   135  	if !ok {
   136  		return nil
   137  	}
   138  	return txn.cache
   139  }
   140  
   141  // MinPendingStartTs returns the min start ts which is currently pending a commit or abort decision.
   142  func (o *oracle) MinPendingStartTs() uint64 {
   143  	o.RLock()
   144  	defer o.RUnlock()
   145  	min := uint64(math.MaxUint64)
   146  	for ts := range o.pendingTxns {
   147  		if ts < min {
   148  			min = ts
   149  		}
   150  	}
   151  	return min
   152  }
   153  
   154  func (o *oracle) NumPendingTxns() int {
   155  	o.RLock()
   156  	defer o.RUnlock()
   157  	return len(o.pendingTxns)
   158  }
   159  
   160  func (o *oracle) TxnOlderThan(dur time.Duration) (res []uint64) {
   161  	o.RLock()
   162  	defer o.RUnlock()
   163  
   164  	cutoff := time.Now().Add(-dur)
   165  	for startTs, txn := range o.pendingTxns {
   166  		if txn.lastUpdate.Before(cutoff) {
   167  			res = append(res, startTs)
   168  		}
   169  	}
   170  	return res
   171  }
   172  
   173  func (o *oracle) addToWaiters(startTs uint64) (chan struct{}, bool) {
   174  	if startTs <= o.MaxAssigned() {
   175  		return nil, false
   176  	}
   177  	o.Lock()
   178  	defer o.Unlock()
   179  	// Check again after acquiring lock, because o.waiters is being processed serially. So, if we
   180  	// don't check here, then it's possible that we add to waiters here, but MaxAssigned has already
   181  	// moved past startTs.
   182  	if startTs <= o.MaxAssigned() {
   183  		return nil, false
   184  	}
   185  	ch := make(chan struct{})
   186  	o.waiters[startTs] = append(o.waiters[startTs], ch)
   187  	return ch, true
   188  }
   189  
   190  func (o *oracle) MaxAssigned() uint64 {
   191  	return atomic.LoadUint64(&o.maxAssigned)
   192  }
   193  
   194  func (o *oracle) WaitForTs(ctx context.Context, startTs uint64) error {
   195  	ch, ok := o.addToWaiters(startTs)
   196  	if !ok {
   197  		return nil
   198  	}
   199  	select {
   200  	case <-ch:
   201  		return nil
   202  	case <-ctx.Done():
   203  		return ctx.Err()
   204  	}
   205  }
   206  
   207  func (o *oracle) ProcessDelta(delta *pb.OracleDelta) {
   208  	if glog.V(3) {
   209  		glog.Infof("ProcessDelta: Max Assigned: %d", delta.MaxAssigned)
   210  		glog.Infof("ProcessDelta: Group checksum: %v", delta.GroupChecksums)
   211  		for _, txn := range delta.Txns {
   212  			if txn.CommitTs == 0 {
   213  				glog.Infof("ProcessDelta Aborted: %d", txn.StartTs)
   214  			} else {
   215  				glog.Infof("ProcessDelta Committed: %d -> %d", txn.StartTs, txn.CommitTs)
   216  			}
   217  		}
   218  	}
   219  
   220  	o.Lock()
   221  	defer o.Unlock()
   222  	for _, txn := range delta.Txns {
   223  		delete(o.pendingTxns, txn.StartTs)
   224  	}
   225  	curMax := o.MaxAssigned()
   226  	if delta.MaxAssigned < curMax {
   227  		return
   228  	}
   229  
   230  	// Notify the waiting cattle.
   231  	for startTs, toNotify := range o.waiters {
   232  		if startTs > delta.MaxAssigned {
   233  			continue
   234  		}
   235  		for _, ch := range toNotify {
   236  			close(ch)
   237  		}
   238  		delete(o.waiters, startTs)
   239  	}
   240  	x.AssertTrue(atomic.CompareAndSwapUint64(&o.maxAssigned, curMax, delta.MaxAssigned))
   241  	ostats.Record(context.Background(),
   242  		x.MaxAssignedTs.M(int64(delta.MaxAssigned))) // Can't access o.MaxAssigned without atomics.
   243  }
   244  
   245  func (o *oracle) ResetTxns() {
   246  	o.Lock()
   247  	defer o.Unlock()
   248  	o.pendingTxns = make(map[uint64]*Txn)
   249  }
   250  
   251  func (o *oracle) GetTxn(startTs uint64) *Txn {
   252  	o.RLock()
   253  	defer o.RUnlock()
   254  	return o.pendingTxns[startTs]
   255  }
   256  
   257  func (txn *Txn) matchesDelta(ok func(key []byte) bool) bool {
   258  	txn.Lock()
   259  	defer txn.Unlock()
   260  	for key := range txn.cache.deltas {
   261  		if ok([]byte(key)) {
   262  			return true
   263  		}
   264  	}
   265  	return false
   266  }
   267  
   268  // IterateTxns returns a list of start timestamps for currently pending transactions, which match
   269  // the provided function.
   270  func (o *oracle) IterateTxns(ok func(key []byte) bool) []uint64 {
   271  	o.RLock()
   272  	defer o.RUnlock()
   273  	var timestamps []uint64
   274  	for startTs, txn := range o.pendingTxns {
   275  		if txn.matchesDelta(ok) {
   276  			timestamps = append(timestamps, startTs)
   277  		}
   278  	}
   279  	return timestamps
   280  }