github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/oracle.go (about) 1 /* 2 * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package posting 18 19 import ( 20 "context" 21 "math" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/dgraph-io/dgraph/protos/pb" 27 "github.com/dgraph-io/dgraph/x" 28 "github.com/golang/glog" 29 ostats "go.opencensus.io/stats" 30 ) 31 32 var o *oracle 33 34 // Oracle returns the global oracle instance. 35 // TODO: Oracle should probably be located in worker package, instead of posting 36 // package now that we don't run inSnapshot anymore. 37 func Oracle() *oracle { 38 return o 39 } 40 41 func init() { 42 o = new(oracle) 43 o.init() 44 } 45 46 // Txn represents a transaction. 47 type Txn struct { 48 StartTs uint64 49 50 // atomic 51 shouldAbort uint32 52 // Fields which can changed after init 53 sync.Mutex 54 55 // Keeps track of conflict keys that should be used to determine if this 56 // transaction conflicts with another. 57 conflicts map[uint64]struct{} 58 59 // Keeps track of last update wall clock. We use this fact later to 60 // determine unhealthy, stale txns. 61 lastUpdate time.Time 62 63 cache *LocalCache // This pointer does not get modified. 64 } 65 66 // NewTxn returns a new Txn instance. 67 func NewTxn(startTs uint64) *Txn { 68 return &Txn{ 69 StartTs: startTs, 70 cache: NewLocalCache(startTs), 71 lastUpdate: time.Now(), 72 } 73 } 74 75 // Get retrieves the posting list for the given list from the local cache. 76 func (txn *Txn) Get(key []byte) (*List, error) { 77 return txn.cache.Get(key) 78 } 79 80 // GetFromDelta retrieves the posting list from delta cache, not from Badger. 81 func (txn *Txn) GetFromDelta(key []byte) (*List, error) { 82 return txn.cache.GetFromDelta(key) 83 } 84 85 // Update calls UpdateDeltasAndDiscardLists on the local cache. 86 func (txn *Txn) Update() { 87 txn.cache.UpdateDeltasAndDiscardLists() 88 } 89 90 // Store is used by tests. 91 func (txn *Txn) Store(pl *List) *List { 92 return txn.cache.SetIfAbsent(string(pl.key), pl) 93 } 94 95 type oracle struct { 96 x.SafeMutex 97 98 // max start ts given out by Zero. Do not use mutex on this, only use atomics. 99 maxAssigned uint64 100 101 // Keeps track of all the startTs we have seen so far, based on the mutations. Then as 102 // transactions are committed or aborted, we delete entries from the startTs map. When taking a 103 // snapshot, we need to know the minimum start ts present in the map, which represents a 104 // mutation which has not yet been committed or aborted. As we iterate over entries, we should 105 // only discard those whose StartTs is below this minimum pending start ts. 106 pendingTxns map[uint64]*Txn 107 108 // Used for waiting logic for transactions with startTs > maxpending so that we don't read an 109 // uncommitted transaction. 110 waiters map[uint64][]chan struct{} 111 } 112 113 func (o *oracle) init() { 114 o.waiters = make(map[uint64][]chan struct{}) 115 o.pendingTxns = make(map[uint64]*Txn) 116 } 117 118 func (o *oracle) RegisterStartTs(ts uint64) *Txn { 119 o.Lock() 120 defer o.Unlock() 121 txn, ok := o.pendingTxns[ts] 122 if ok { 123 txn.lastUpdate = time.Now() 124 } else { 125 txn = NewTxn(ts) 126 o.pendingTxns[ts] = txn 127 } 128 return txn 129 } 130 131 func (o *oracle) CacheAt(ts uint64) *LocalCache { 132 o.RLock() 133 defer o.RUnlock() 134 txn, ok := o.pendingTxns[ts] 135 if !ok { 136 return nil 137 } 138 return txn.cache 139 } 140 141 // MinPendingStartTs returns the min start ts which is currently pending a commit or abort decision. 142 func (o *oracle) MinPendingStartTs() uint64 { 143 o.RLock() 144 defer o.RUnlock() 145 min := uint64(math.MaxUint64) 146 for ts := range o.pendingTxns { 147 if ts < min { 148 min = ts 149 } 150 } 151 return min 152 } 153 154 func (o *oracle) NumPendingTxns() int { 155 o.RLock() 156 defer o.RUnlock() 157 return len(o.pendingTxns) 158 } 159 160 func (o *oracle) TxnOlderThan(dur time.Duration) (res []uint64) { 161 o.RLock() 162 defer o.RUnlock() 163 164 cutoff := time.Now().Add(-dur) 165 for startTs, txn := range o.pendingTxns { 166 if txn.lastUpdate.Before(cutoff) { 167 res = append(res, startTs) 168 } 169 } 170 return res 171 } 172 173 func (o *oracle) addToWaiters(startTs uint64) (chan struct{}, bool) { 174 if startTs <= o.MaxAssigned() { 175 return nil, false 176 } 177 o.Lock() 178 defer o.Unlock() 179 // Check again after acquiring lock, because o.waiters is being processed serially. So, if we 180 // don't check here, then it's possible that we add to waiters here, but MaxAssigned has already 181 // moved past startTs. 182 if startTs <= o.MaxAssigned() { 183 return nil, false 184 } 185 ch := make(chan struct{}) 186 o.waiters[startTs] = append(o.waiters[startTs], ch) 187 return ch, true 188 } 189 190 func (o *oracle) MaxAssigned() uint64 { 191 return atomic.LoadUint64(&o.maxAssigned) 192 } 193 194 func (o *oracle) WaitForTs(ctx context.Context, startTs uint64) error { 195 ch, ok := o.addToWaiters(startTs) 196 if !ok { 197 return nil 198 } 199 select { 200 case <-ch: 201 return nil 202 case <-ctx.Done(): 203 return ctx.Err() 204 } 205 } 206 207 func (o *oracle) ProcessDelta(delta *pb.OracleDelta) { 208 if glog.V(3) { 209 glog.Infof("ProcessDelta: Max Assigned: %d", delta.MaxAssigned) 210 glog.Infof("ProcessDelta: Group checksum: %v", delta.GroupChecksums) 211 for _, txn := range delta.Txns { 212 if txn.CommitTs == 0 { 213 glog.Infof("ProcessDelta Aborted: %d", txn.StartTs) 214 } else { 215 glog.Infof("ProcessDelta Committed: %d -> %d", txn.StartTs, txn.CommitTs) 216 } 217 } 218 } 219 220 o.Lock() 221 defer o.Unlock() 222 for _, txn := range delta.Txns { 223 delete(o.pendingTxns, txn.StartTs) 224 } 225 curMax := o.MaxAssigned() 226 if delta.MaxAssigned < curMax { 227 return 228 } 229 230 // Notify the waiting cattle. 231 for startTs, toNotify := range o.waiters { 232 if startTs > delta.MaxAssigned { 233 continue 234 } 235 for _, ch := range toNotify { 236 close(ch) 237 } 238 delete(o.waiters, startTs) 239 } 240 x.AssertTrue(atomic.CompareAndSwapUint64(&o.maxAssigned, curMax, delta.MaxAssigned)) 241 ostats.Record(context.Background(), 242 x.MaxAssignedTs.M(int64(delta.MaxAssigned))) // Can't access o.MaxAssigned without atomics. 243 } 244 245 func (o *oracle) ResetTxns() { 246 o.Lock() 247 defer o.Unlock() 248 o.pendingTxns = make(map[uint64]*Txn) 249 } 250 251 func (o *oracle) GetTxn(startTs uint64) *Txn { 252 o.RLock() 253 defer o.RUnlock() 254 return o.pendingTxns[startTs] 255 } 256 257 func (txn *Txn) matchesDelta(ok func(key []byte) bool) bool { 258 txn.Lock() 259 defer txn.Unlock() 260 for key := range txn.cache.deltas { 261 if ok([]byte(key)) { 262 return true 263 } 264 } 265 return false 266 } 267 268 // IterateTxns returns a list of start timestamps for currently pending transactions, which match 269 // the provided function. 270 func (o *oracle) IterateTxns(ok func(key []byte) bool) []uint64 { 271 o.RLock() 272 defer o.RUnlock() 273 var timestamps []uint64 274 for startTs, txn := range o.pendingTxns { 275 if txn.matchesDelta(ok) { 276 timestamps = append(timestamps, startTs) 277 } 278 } 279 return timestamps 280 }