github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/lists.go (about) 1 /* 2 * Copyright 2015-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package posting 18 19 import ( 20 "context" 21 "fmt" 22 "io/ioutil" 23 "os" 24 "os/exec" 25 "runtime" 26 "strconv" 27 "strings" 28 "sync" 29 "time" 30 31 ostats "go.opencensus.io/stats" 32 33 "github.com/dgraph-io/badger" 34 "github.com/dgraph-io/badger/y" 35 "github.com/dgraph-io/dgo/protos/api" 36 "github.com/golang/glog" 37 38 "github.com/dgraph-io/dgraph/protos/pb" 39 "github.com/dgraph-io/dgraph/x" 40 ) 41 42 var ( 43 emptyPostingList []byte // Used for indexing. 44 ) 45 46 const ( 47 mb = 1 << 20 48 ) 49 50 // syncMarks stores the watermark for synced RAFT proposals. Each RAFT proposal consists 51 // of many individual mutations, which could be applied to many different posting lists. 52 // Thus, each PL when being mutated would send an undone Mark, and each list would 53 // accumulate all such pending marks. When the PL is synced to BadgerDB, it would 54 // mark all the pending ones as done. 55 // This ideally belongs to RAFT node struct (where committed watermark is being tracked), 56 // but because the logic of mutations is 57 // present here and to avoid a circular dependency, we've placed it here. 58 // Note that there's one watermark for each RAFT node/group. 59 // This watermark would be used for taking snapshots, to ensure that all the data and 60 // index mutations have been syned to BadgerDB, before a snapshot is taken, and previous 61 // RAFT entries discarded. 62 func init() { 63 x.AddInit(func() { 64 pl := pb.PostingList{} 65 var err error 66 emptyPostingList, err = pl.Marshal() 67 x.Check(err) 68 }) 69 } 70 71 func getMemUsage() int { 72 if runtime.GOOS != "linux" { 73 pid := os.Getpid() 74 cmd := fmt.Sprintf("ps -ao rss,pid | grep %v", pid) 75 c1, err := exec.Command("bash", "-c", cmd).Output() 76 if err != nil { 77 // In case of error running the command, resort to go way 78 var ms runtime.MemStats 79 runtime.ReadMemStats(&ms) 80 megs := ms.Alloc 81 return int(megs) 82 } 83 84 rss := strings.Split(string(c1), " ")[0] 85 kbs, err := strconv.Atoi(rss) 86 if err != nil { 87 return 0 88 } 89 90 megs := kbs << 10 91 return megs 92 } 93 94 contents, err := ioutil.ReadFile("/proc/self/stat") 95 if err != nil { 96 glog.Errorf("Can't read the proc file. Err: %v\n", err) 97 return 0 98 } 99 100 cont := strings.Split(string(contents), " ") 101 // 24th entry of the file is the RSS which denotes the number of pages 102 // used by the process. 103 if len(cont) < 24 { 104 glog.Errorln("Error in RSS from stat") 105 return 0 106 } 107 108 rss, err := strconv.Atoi(cont[23]) 109 if err != nil { 110 glog.Errorln(err) 111 return 0 112 } 113 114 return rss * os.Getpagesize() 115 } 116 117 func updateMemoryMetrics(lc *y.Closer) { 118 defer lc.Done() 119 ticker := time.NewTicker(time.Minute) 120 defer ticker.Stop() 121 122 for { 123 select { 124 case <-lc.HasBeenClosed(): 125 return 126 case <-ticker.C: 127 var ms runtime.MemStats 128 runtime.ReadMemStats(&ms) 129 130 inUse := ms.HeapInuse + ms.StackInuse 131 // From runtime/mstats.go: 132 // HeapIdle minus HeapReleased estimates the amount of memory 133 // that could be returned to the OS, but is being retained by 134 // the runtime so it can grow the heap without requesting more 135 // memory from the OS. If this difference is significantly 136 // larger than the heap size, it indicates there was a recent 137 // transient spike in live heap size. 138 idle := ms.HeapIdle - ms.HeapReleased 139 140 ostats.Record(context.Background(), 141 x.MemoryInUse.M(int64(inUse)), 142 x.MemoryIdle.M(int64(idle)), 143 x.MemoryProc.M(int64(getMemUsage()))) 144 } 145 } 146 } 147 148 var ( 149 pstore *badger.DB 150 closer *y.Closer 151 ) 152 153 // Init initializes the posting lists package, the in memory and dirty list hash. 154 func Init(ps *badger.DB) { 155 pstore = ps 156 closer = y.NewCloser(1) 157 go updateMemoryMetrics(closer) 158 } 159 160 // Cleanup waits until the closer has finished processing. 161 func Cleanup() { 162 closer.SignalAndWait() 163 } 164 165 // GetNoStore returns the list stored in the key or creates a new one if it doesn't exist. 166 // It does not store the list in any cache. 167 func GetNoStore(key []byte) (rlist *List, err error) { 168 return getNew(key, pstore) 169 } 170 171 // LocalCache stores a cache of posting lists and deltas. 172 // This doesn't sync, so call this only when you don't care about dirty posting lists in 173 // memory(for example before populating snapshot) or after calling syncAllMarks 174 type LocalCache struct { 175 sync.RWMutex 176 177 startTs uint64 178 179 // The keys for these maps is a string representation of the Badger key for the posting list. 180 // deltas keep track of the updates made by txn. These must be kept around until written to disk 181 // during commit. 182 deltas map[string][]byte 183 184 // max committed timestamp of the read posting lists. 185 maxVersions map[string]uint64 186 187 // plists are posting lists in memory. They can be discarded to reclaim space. 188 plists map[string]*List 189 } 190 191 // NewLocalCache returns a new LocalCache instance. 192 func NewLocalCache(startTs uint64) *LocalCache { 193 return &LocalCache{ 194 startTs: startTs, 195 deltas: make(map[string][]byte), 196 plists: make(map[string]*List), 197 maxVersions: make(map[string]uint64), 198 } 199 } 200 201 func (lc *LocalCache) getNoStore(key string) *List { 202 lc.RLock() 203 defer lc.RUnlock() 204 if l, ok := lc.plists[key]; ok { 205 return l 206 } 207 return nil 208 } 209 210 // SetIfAbsent adds the list for the specified key to the cache. If a list for the same 211 // key already exists, the cache will not be modified and the existing list 212 // will be returned instead. This behavior is meant to prevent the goroutines 213 // using the cache from ending up with an orphaned version of a list. 214 func (lc *LocalCache) SetIfAbsent(key string, updated *List) *List { 215 lc.Lock() 216 defer lc.Unlock() 217 if pl, ok := lc.plists[key]; ok { 218 return pl 219 } 220 lc.plists[key] = updated 221 return updated 222 } 223 224 func (lc *LocalCache) getInternal(key []byte, readFromDisk bool) (*List, error) { 225 if lc == nil { 226 return getNew(key, pstore) 227 } 228 skey := string(key) 229 if pl := lc.getNoStore(skey); pl != nil { 230 return pl, nil 231 } 232 233 var pl *List 234 if readFromDisk { 235 var err error 236 pl, err = getNew(key, pstore) 237 if err != nil { 238 return nil, err 239 } 240 } else { 241 pl = &List{ 242 key: key, 243 mutationMap: make(map[uint64]*pb.PostingList), 244 plist: new(pb.PostingList), 245 } 246 } 247 248 // If we just brought this posting list into memory and we already have a delta for it, let's 249 // apply it before returning the list. 250 lc.RLock() 251 if delta, ok := lc.deltas[skey]; ok && len(delta) > 0 { 252 pl.setMutation(lc.startTs, delta) 253 } 254 lc.RUnlock() 255 return lc.SetIfAbsent(skey, pl), nil 256 } 257 258 // Get retrieves the cached version of the list associated with the given key. 259 func (lc *LocalCache) Get(key []byte) (*List, error) { 260 return lc.getInternal(key, true) 261 } 262 263 // GetFromDelta gets the cached version of the list without reading from disk 264 // and only applies the existing deltas. This is used in situations where the 265 // posting list will only be modified and not read (e.g adding index mutations). 266 func (lc *LocalCache) GetFromDelta(key []byte) (*List, error) { 267 return lc.getInternal(key, false) 268 } 269 270 // UpdateDeltasAndDiscardLists updates the delta cache before removing the stored posting lists. 271 func (lc *LocalCache) UpdateDeltasAndDiscardLists() { 272 lc.Lock() 273 defer lc.Unlock() 274 if len(lc.plists) == 0 { 275 return 276 } 277 278 for key, pl := range lc.plists { 279 data := pl.getMutation(lc.startTs) 280 if len(data) > 0 { 281 lc.deltas[key] = data 282 } 283 lc.maxVersions[key] = pl.maxVersion() 284 // We can't run pl.release() here because LocalCache is still being used by other callers 285 // for the same transaction, who might be holding references to posting lists. 286 // TODO: Find another way to reuse postings via postingPool. 287 } 288 lc.plists = make(map[string]*List) 289 } 290 291 func (lc *LocalCache) fillPreds(ctx *api.TxnContext, gid uint32) { 292 lc.RLock() 293 defer lc.RUnlock() 294 for key := range lc.deltas { 295 pk, err := x.Parse([]byte(key)) 296 x.Check(err) 297 if len(pk.Attr) == 0 { 298 continue 299 } 300 // Also send the group id that the predicate was being served by. This is useful when 301 // checking if Zero should allow a commit during a predicate move. 302 predKey := fmt.Sprintf("%d-%s", gid, pk.Attr) 303 if !x.HasString(ctx.Preds, predKey) { 304 ctx.Preds = append(ctx.Preds, predKey) 305 } 306 } 307 }