github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/lists.go (about)

     1  /*
     2   * Copyright 2015-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package posting
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"os"
    24  	"os/exec"
    25  	"runtime"
    26  	"strconv"
    27  	"strings"
    28  	"sync"
    29  	"time"
    30  
    31  	ostats "go.opencensus.io/stats"
    32  
    33  	"github.com/dgraph-io/badger"
    34  	"github.com/dgraph-io/badger/y"
    35  	"github.com/dgraph-io/dgo/protos/api"
    36  	"github.com/golang/glog"
    37  
    38  	"github.com/dgraph-io/dgraph/protos/pb"
    39  	"github.com/dgraph-io/dgraph/x"
    40  )
    41  
    42  var (
    43  	emptyPostingList []byte // Used for indexing.
    44  )
    45  
    46  const (
    47  	mb = 1 << 20
    48  )
    49  
    50  // syncMarks stores the watermark for synced RAFT proposals. Each RAFT proposal consists
    51  // of many individual mutations, which could be applied to many different posting lists.
    52  // Thus, each PL when being mutated would send an undone Mark, and each list would
    53  // accumulate all such pending marks. When the PL is synced to BadgerDB, it would
    54  // mark all the pending ones as done.
    55  // This ideally belongs to RAFT node struct (where committed watermark is being tracked),
    56  // but because the logic of mutations is
    57  // present here and to avoid a circular dependency, we've placed it here.
    58  // Note that there's one watermark for each RAFT node/group.
    59  // This watermark would be used for taking snapshots, to ensure that all the data and
    60  // index mutations have been syned to BadgerDB, before a snapshot is taken, and previous
    61  // RAFT entries discarded.
    62  func init() {
    63  	x.AddInit(func() {
    64  		pl := pb.PostingList{}
    65  		var err error
    66  		emptyPostingList, err = pl.Marshal()
    67  		x.Check(err)
    68  	})
    69  }
    70  
    71  func getMemUsage() int {
    72  	if runtime.GOOS != "linux" {
    73  		pid := os.Getpid()
    74  		cmd := fmt.Sprintf("ps -ao rss,pid | grep %v", pid)
    75  		c1, err := exec.Command("bash", "-c", cmd).Output()
    76  		if err != nil {
    77  			// In case of error running the command, resort to go way
    78  			var ms runtime.MemStats
    79  			runtime.ReadMemStats(&ms)
    80  			megs := ms.Alloc
    81  			return int(megs)
    82  		}
    83  
    84  		rss := strings.Split(string(c1), " ")[0]
    85  		kbs, err := strconv.Atoi(rss)
    86  		if err != nil {
    87  			return 0
    88  		}
    89  
    90  		megs := kbs << 10
    91  		return megs
    92  	}
    93  
    94  	contents, err := ioutil.ReadFile("/proc/self/stat")
    95  	if err != nil {
    96  		glog.Errorf("Can't read the proc file. Err: %v\n", err)
    97  		return 0
    98  	}
    99  
   100  	cont := strings.Split(string(contents), " ")
   101  	// 24th entry of the file is the RSS which denotes the number of pages
   102  	// used by the process.
   103  	if len(cont) < 24 {
   104  		glog.Errorln("Error in RSS from stat")
   105  		return 0
   106  	}
   107  
   108  	rss, err := strconv.Atoi(cont[23])
   109  	if err != nil {
   110  		glog.Errorln(err)
   111  		return 0
   112  	}
   113  
   114  	return rss * os.Getpagesize()
   115  }
   116  
   117  func updateMemoryMetrics(lc *y.Closer) {
   118  	defer lc.Done()
   119  	ticker := time.NewTicker(time.Minute)
   120  	defer ticker.Stop()
   121  
   122  	for {
   123  		select {
   124  		case <-lc.HasBeenClosed():
   125  			return
   126  		case <-ticker.C:
   127  			var ms runtime.MemStats
   128  			runtime.ReadMemStats(&ms)
   129  
   130  			inUse := ms.HeapInuse + ms.StackInuse
   131  			// From runtime/mstats.go:
   132  			// HeapIdle minus HeapReleased estimates the amount of memory
   133  			// that could be returned to the OS, but is being retained by
   134  			// the runtime so it can grow the heap without requesting more
   135  			// memory from the OS. If this difference is significantly
   136  			// larger than the heap size, it indicates there was a recent
   137  			// transient spike in live heap size.
   138  			idle := ms.HeapIdle - ms.HeapReleased
   139  
   140  			ostats.Record(context.Background(),
   141  				x.MemoryInUse.M(int64(inUse)),
   142  				x.MemoryIdle.M(int64(idle)),
   143  				x.MemoryProc.M(int64(getMemUsage())))
   144  		}
   145  	}
   146  }
   147  
   148  var (
   149  	pstore *badger.DB
   150  	closer *y.Closer
   151  )
   152  
   153  // Init initializes the posting lists package, the in memory and dirty list hash.
   154  func Init(ps *badger.DB) {
   155  	pstore = ps
   156  	closer = y.NewCloser(1)
   157  	go updateMemoryMetrics(closer)
   158  }
   159  
   160  // Cleanup waits until the closer has finished processing.
   161  func Cleanup() {
   162  	closer.SignalAndWait()
   163  }
   164  
   165  // GetNoStore returns the list stored in the key or creates a new one if it doesn't exist.
   166  // It does not store the list in any cache.
   167  func GetNoStore(key []byte) (rlist *List, err error) {
   168  	return getNew(key, pstore)
   169  }
   170  
   171  // LocalCache stores a cache of posting lists and deltas.
   172  // This doesn't sync, so call this only when you don't care about dirty posting lists in
   173  // memory(for example before populating snapshot) or after calling syncAllMarks
   174  type LocalCache struct {
   175  	sync.RWMutex
   176  
   177  	startTs uint64
   178  
   179  	// The keys for these maps is a string representation of the Badger key for the posting list.
   180  	// deltas keep track of the updates made by txn. These must be kept around until written to disk
   181  	// during commit.
   182  	deltas map[string][]byte
   183  
   184  	// max committed timestamp of the read posting lists.
   185  	maxVersions map[string]uint64
   186  
   187  	// plists are posting lists in memory. They can be discarded to reclaim space.
   188  	plists map[string]*List
   189  }
   190  
   191  // NewLocalCache returns a new LocalCache instance.
   192  func NewLocalCache(startTs uint64) *LocalCache {
   193  	return &LocalCache{
   194  		startTs:     startTs,
   195  		deltas:      make(map[string][]byte),
   196  		plists:      make(map[string]*List),
   197  		maxVersions: make(map[string]uint64),
   198  	}
   199  }
   200  
   201  func (lc *LocalCache) getNoStore(key string) *List {
   202  	lc.RLock()
   203  	defer lc.RUnlock()
   204  	if l, ok := lc.plists[key]; ok {
   205  		return l
   206  	}
   207  	return nil
   208  }
   209  
   210  // SetIfAbsent adds the list for the specified key to the cache. If a list for the same
   211  // key already exists, the cache will not be modified and the existing list
   212  // will be returned instead. This behavior is meant to prevent the goroutines
   213  // using the cache from ending up with an orphaned version of a list.
   214  func (lc *LocalCache) SetIfAbsent(key string, updated *List) *List {
   215  	lc.Lock()
   216  	defer lc.Unlock()
   217  	if pl, ok := lc.plists[key]; ok {
   218  		return pl
   219  	}
   220  	lc.plists[key] = updated
   221  	return updated
   222  }
   223  
   224  func (lc *LocalCache) getInternal(key []byte, readFromDisk bool) (*List, error) {
   225  	if lc == nil {
   226  		return getNew(key, pstore)
   227  	}
   228  	skey := string(key)
   229  	if pl := lc.getNoStore(skey); pl != nil {
   230  		return pl, nil
   231  	}
   232  
   233  	var pl *List
   234  	if readFromDisk {
   235  		var err error
   236  		pl, err = getNew(key, pstore)
   237  		if err != nil {
   238  			return nil, err
   239  		}
   240  	} else {
   241  		pl = &List{
   242  			key:         key,
   243  			mutationMap: make(map[uint64]*pb.PostingList),
   244  			plist:       new(pb.PostingList),
   245  		}
   246  	}
   247  
   248  	// If we just brought this posting list into memory and we already have a delta for it, let's
   249  	// apply it before returning the list.
   250  	lc.RLock()
   251  	if delta, ok := lc.deltas[skey]; ok && len(delta) > 0 {
   252  		pl.setMutation(lc.startTs, delta)
   253  	}
   254  	lc.RUnlock()
   255  	return lc.SetIfAbsent(skey, pl), nil
   256  }
   257  
   258  // Get retrieves the cached version of the list associated with the given key.
   259  func (lc *LocalCache) Get(key []byte) (*List, error) {
   260  	return lc.getInternal(key, true)
   261  }
   262  
   263  // GetFromDelta gets the cached version of the list without reading from disk
   264  // and only applies the existing deltas. This is used in situations where the
   265  // posting list will only be modified and not read (e.g adding index mutations).
   266  func (lc *LocalCache) GetFromDelta(key []byte) (*List, error) {
   267  	return lc.getInternal(key, false)
   268  }
   269  
   270  // UpdateDeltasAndDiscardLists updates the delta cache before removing the stored posting lists.
   271  func (lc *LocalCache) UpdateDeltasAndDiscardLists() {
   272  	lc.Lock()
   273  	defer lc.Unlock()
   274  	if len(lc.plists) == 0 {
   275  		return
   276  	}
   277  
   278  	for key, pl := range lc.plists {
   279  		data := pl.getMutation(lc.startTs)
   280  		if len(data) > 0 {
   281  			lc.deltas[key] = data
   282  		}
   283  		lc.maxVersions[key] = pl.maxVersion()
   284  		// We can't run pl.release() here because LocalCache is still being used by other callers
   285  		// for the same transaction, who might be holding references to posting lists.
   286  		// TODO: Find another way to reuse postings via postingPool.
   287  	}
   288  	lc.plists = make(map[string]*List)
   289  }
   290  
   291  func (lc *LocalCache) fillPreds(ctx *api.TxnContext, gid uint32) {
   292  	lc.RLock()
   293  	defer lc.RUnlock()
   294  	for key := range lc.deltas {
   295  		pk, err := x.Parse([]byte(key))
   296  		x.Check(err)
   297  		if len(pk.Attr) == 0 {
   298  			continue
   299  		}
   300  		// Also send the group id that the predicate was being served by. This is useful when
   301  		// checking if Zero should allow a commit during a predicate move.
   302  		predKey := fmt.Sprintf("%d-%s", gid, pk.Attr)
   303  		if !x.HasString(ctx.Preds, predKey) {
   304  			ctx.Preds = append(ctx.Preds, predKey)
   305  		}
   306  	}
   307  }