github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/mvcc.go (about) 1 /* 2 * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package posting 18 19 import ( 20 "bytes" 21 "encoding/hex" 22 "math" 23 "strconv" 24 "sync/atomic" 25 26 "github.com/dgraph-io/badger" 27 "github.com/dgraph-io/dgo/protos/api" 28 "github.com/dgraph-io/dgraph/protos/pb" 29 "github.com/dgraph-io/dgraph/x" 30 "github.com/pkg/errors" 31 ) 32 33 var ( 34 // ErrTsTooOld is returned when a transaction is too old to be applied. 35 ErrTsTooOld = errors.Errorf("Transaction is too old") 36 ) 37 38 // ShouldAbort returns whether the transaction should be aborted. 39 func (txn *Txn) ShouldAbort() bool { 40 if txn == nil { 41 return false 42 } 43 return atomic.LoadUint32(&txn.shouldAbort) > 0 44 } 45 46 func (txn *Txn) addConflictKey(conflictKey uint64) { 47 txn.Lock() 48 defer txn.Unlock() 49 if txn.conflicts == nil { 50 txn.conflicts = make(map[uint64]struct{}) 51 } 52 if conflictKey > 0 { 53 txn.conflicts[conflictKey] = struct{}{} 54 } 55 } 56 57 // FillContext updates the given transaction context with data from this transaction. 58 func (txn *Txn) FillContext(ctx *api.TxnContext, gid uint32) { 59 txn.Lock() 60 ctx.StartTs = txn.StartTs 61 for key := range txn.conflicts { 62 // We don'txn need to send the whole conflict key to Zero. Solving #2338 63 // should be done by sending a list of mutating predicates to Zero, 64 // along with the keys to be used for conflict detection. 65 fps := strconv.FormatUint(key, 36) 66 if !x.HasString(ctx.Keys, fps) { 67 ctx.Keys = append(ctx.Keys, fps) 68 } 69 } 70 txn.Unlock() 71 72 txn.Update() 73 txn.cache.fillPreds(ctx, gid) 74 } 75 76 // CommitToDisk commits a transaction to disk. 77 // This function only stores deltas to the commit timestamps. It does not try to generate a state. 78 // State generation is done via rollups, which happen when a snapshot is created. 79 // Don't call this for schema mutations. Directly commit them. 80 func (txn *Txn) CommitToDisk(writer *TxnWriter, commitTs uint64) error { 81 if commitTs == 0 { 82 return nil 83 } 84 85 cache := txn.cache 86 cache.Lock() 87 defer cache.Unlock() 88 89 var keys []string 90 for key := range cache.deltas { 91 keys = append(keys, key) 92 } 93 94 var idx int 95 for idx < len(keys) { 96 // writer.update can return early from the loop in case we encounter badger.ErrTxnTooBig. On 97 // that error, writer.update would still commit the transaction and return any error. If 98 // nil, we continue to process the remaining keys. 99 err := writer.update(commitTs, func(btxn *badger.Txn) error { 100 for ; idx < len(keys); idx++ { 101 key := keys[idx] 102 data := cache.deltas[key] 103 if len(data) == 0 { 104 continue 105 } 106 if ts := cache.maxVersions[key]; ts >= commitTs { 107 // Skip write because we already have a write at a higher ts. 108 // Logging here can cause a lot of output when doing Raft log replay. So, let's 109 // not output anything here. 110 continue 111 } 112 err := btxn.SetEntry(&badger.Entry{ 113 Key: []byte(key), 114 Value: data, 115 UserMeta: BitDeltaPosting, 116 }) 117 if err != nil { 118 return err 119 } 120 } 121 return nil 122 }) 123 if err != nil { 124 return err 125 } 126 } 127 return nil 128 } 129 130 func unmarshalOrCopy(plist *pb.PostingList, item *badger.Item) error { 131 return item.Value(func(val []byte) error { 132 if len(val) == 0 { 133 // empty pl 134 return nil 135 } 136 return plist.Unmarshal(val) 137 }) 138 } 139 140 // ReadPostingList constructs the posting list from the disk using the passed iterator. 141 // Use forward iterator with allversions enabled in iter options. 142 // key would now be owned by the posting list. So, ensure that it isn't reused elsewhere. 143 func ReadPostingList(key []byte, it *badger.Iterator) (*List, error) { 144 l := new(List) 145 l.key = key 146 l.mutationMap = make(map[uint64]*pb.PostingList) 147 l.plist = new(pb.PostingList) 148 149 // Iterates from highest Ts to lowest Ts 150 for it.Valid() { 151 item := it.Item() 152 if !bytes.Equal(item.Key(), l.key) { 153 break 154 } 155 l.maxTs = x.Max(l.maxTs, item.Version()) 156 if item.IsDeletedOrExpired() { 157 // Don't consider any more versions. 158 break 159 } 160 161 switch item.UserMeta() { 162 case BitEmptyPosting: 163 l.minTs = item.Version() 164 return l, nil 165 case BitCompletePosting: 166 if err := unmarshalOrCopy(l.plist, item); err != nil { 167 return nil, err 168 } 169 l.minTs = item.Version() 170 // No need to do Next here. The outer loop can take care of skipping 171 // more versions of the same key. 172 return l, nil 173 case BitDeltaPosting: 174 err := item.Value(func(val []byte) error { 175 pl := &pb.PostingList{} 176 x.Check(pl.Unmarshal(val)) 177 pl.CommitTs = item.Version() 178 for _, mpost := range pl.Postings { 179 // commitTs, startTs are meant to be only in memory, not 180 // stored on disk. 181 mpost.CommitTs = item.Version() 182 } 183 l.mutationMap[pl.CommitTs] = pl 184 return nil 185 }) 186 if err != nil { 187 return nil, err 188 } 189 case BitSchemaPosting: 190 return nil, errors.Errorf( 191 "Trying to read schema in ReadPostingList for key: %s", hex.Dump(key)) 192 default: 193 return nil, errors.Errorf( 194 "Unexpected meta: %d for key: %s", item.UserMeta(), hex.Dump(key)) 195 } 196 if item.DiscardEarlierVersions() { 197 break 198 } 199 it.Next() 200 } 201 return l, nil 202 } 203 204 // TODO: We should only create a posting list with a specific readTs. 205 func getNew(key []byte, pstore *badger.DB) (*List, error) { 206 txn := pstore.NewTransactionAt(math.MaxUint64, false) 207 defer txn.Discard() 208 209 // When we do rollups, an older version would go to the top of the LSM tree, which can cause 210 // issues during txn.Get. Therefore, always iterate. 211 iterOpts := badger.DefaultIteratorOptions 212 iterOpts.AllVersions = true 213 iterOpts.PrefetchValues = false 214 itr := txn.NewKeyIterator(key, iterOpts) 215 defer itr.Close() 216 itr.Seek(key) 217 return ReadPostingList(key, itr) 218 }