github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/presence/pruner.go (about)

     1  // Copyright 2017 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package presence
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/juju/errors"
    10  	"gopkg.in/mgo.v2"
    11  	"gopkg.in/mgo.v2/bson"
    12  )
    13  
    14  // beingRemover tracks what records we've decided we wanted to remove.
    15  type beingRemover struct {
    16  	queue []string
    17  }
    18  
    19  // Pruner tracks the state of removing unworthy beings from the
    20  // presence.beings and presence.pings collections. Being sequences are unworthy
    21  // once their sequence has been superseded, and pings older than 2 slots are
    22  // no longer referenced.
    23  type Pruner struct {
    24  	modelUUID    string
    25  	beingsC      *mgo.Collection
    26  	pingsC       *mgo.Collection
    27  	toRemove     []string
    28  	maxQueue     int
    29  	removedCount uint64
    30  	delta        time.Duration
    31  }
    32  
    33  // iterKeys is returns an iterator of Keys from this modelUUID and which Sequences
    34  // are used to represent them.
    35  // It only returns sequences that have more than one sequence associated with the same
    36  // being (as beings with a single sequence will never be pruned).
    37  func (p *Pruner) iterKeys() *mgo.Iter {
    38  	thisModelRegex := bson.M{"_id": bson.M{"$regex": bson.RegEx{"^" + p.modelUUID, ""}}}
    39  	pipe := p.beingsC.Pipe([]bson.M{
    40  		// Grab all sequences for this model
    41  		{"$match": thisModelRegex},
    42  		// We don't need the _id
    43  		{"$project": bson.M{"_id": 0, "seq": 1, "key": 1}},
    44  		// Group all the sequences by their key.
    45  		{"$group": bson.M{
    46  			"_id":  "$key",
    47  			"seqs": bson.M{"$push": "$seq"},
    48  		}},
    49  		// Filter out any keys that have only a single sequence
    50  		// representing them
    51  		// Note: indexing is from 0, you can set this to 2 if you wanted
    52  		// to only bother pruning sequences that have >2 entries.
    53  		// This mostly helps the 'nothing to do' case, dropping the time
    54  		// to realize there are no sequences to be removed from 36ms,
    55  		// down to 15ms with 3500 keys.
    56  		{"$match": bson.M{"seqs.1": bson.M{"$exists": 1}}},
    57  	})
    58  	pipe.Batch(1600)
    59  	return pipe.Iter()
    60  }
    61  
    62  // queueRemoval includes this sequence as one that has been superseded
    63  func (p *Pruner) queueRemoval(seq int64) {
    64  	p.toRemove = append(p.toRemove, docIDInt64(p.modelUUID, seq))
    65  }
    66  
    67  // flushRemovals makes sure that we've applied all desired removals
    68  func (p *Pruner) flushRemovals() error {
    69  	if len(p.toRemove) == 0 {
    70  		return nil
    71  	}
    72  	matched, err := p.beingsC.RemoveAll(bson.M{"_id": bson.M{"$in": p.toRemove}})
    73  	if err != nil {
    74  		return err
    75  	}
    76  	p.toRemove = p.toRemove[:0]
    77  	if matched.Removed > 0 {
    78  		p.removedCount += uint64(matched.Removed)
    79  	}
    80  	return err
    81  }
    82  
    83  func (p *Pruner) removeOldPings() error {
    84  	// now and now-period are both considered active slots, so we don't
    85  	// touch those. We also leave 2 more slots around
    86  	startTime := time.Now()
    87  	logger.Tracef("pruning %q for %q", p.pingsC.Name, p.modelUUID)
    88  	s := timeSlot(time.Now(), p.delta)
    89  	oldSlot := s - 3*period
    90  	res, err := p.pingsC.RemoveAll(bson.D{{"_id", bson.RegEx{"^" + p.modelUUID, ""}},
    91  		{"slot", bson.M{"$lt": oldSlot}}})
    92  	if err != nil && err != mgo.ErrNotFound {
    93  		logger.Errorf("error removing old entries from %q: %v", p.pingsC.Name, err)
    94  		return err
    95  	}
    96  	logger.Debugf("pruned %q for %q of %d old pings in %v",
    97  		p.pingsC.Name, p.modelUUID, res.Removed, time.Since(startTime))
    98  	return nil
    99  }
   100  
   101  func (p *Pruner) removeUnusedBeings(memCache map[int64]string) error {
   102  	var keyInfo collapsedBeingsInfo
   103  	seqSet, err := p.findActiveSeqs()
   104  	if err != nil {
   105  		return err
   106  	}
   107  	logger.Tracef("pruning %q for %q starting", p.beingsC.Name, p.modelUUID)
   108  	startTime := time.Now()
   109  	keyCount := 0
   110  	seqCount := 0
   111  	iter := p.iterKeys()
   112  	defer iter.Close()
   113  	for iter.Next(&keyInfo) {
   114  		keyCount += 1
   115  		// Find the max
   116  		maxSeq := int64(-1)
   117  		for _, seq := range keyInfo.Seqs {
   118  			if seq > maxSeq {
   119  				maxSeq = seq
   120  			}
   121  		}
   122  		// Queue everything < max to be deleted
   123  		for _, seq := range keyInfo.Seqs {
   124  			seqCount++
   125  			_, isActive := seqSet[seq]
   126  			if seq >= maxSeq || isActive {
   127  				// It shouldn't be possible to be > at this point
   128  				continue
   129  			}
   130  			p.queueRemoval(seq)
   131  			if len(p.toRemove) > p.maxQueue {
   132  				if err := p.flushRemovals(); err != nil {
   133  					return err
   134  				}
   135  			}
   136  		}
   137  	}
   138  	if err := p.flushRemovals(); err != nil {
   139  		return err
   140  	}
   141  	if err := iter.Close(); err != nil {
   142  		return err
   143  	}
   144  	// now for the memory cache, also clear out any keys that aren't in the active set.
   145  	for seq := range memCache {
   146  		if _, isActive := seqSet[seq]; !isActive {
   147  			delete(memCache, seq)
   148  		}
   149  	}
   150  	logger.Debugf("pruned %q for %q of %d sequence keys (evaluated %d) from %d keys in %v",
   151  		p.beingsC.Name, p.modelUUID, p.removedCount, seqCount, keyCount, time.Since(startTime))
   152  	return nil
   153  }
   154  
   155  func (p *Pruner) findActiveSeqs() (map[int64]struct{}, error) {
   156  	// After pruning old pings, we now track all sequences which are still alive.
   157  	var infos []pingInfo
   158  	err := p.pingsC.Find(nil).All(&infos)
   159  	if err != nil {
   160  		return nil, err
   161  	}
   162  	maps := make([]map[string]int64, 0, len(infos)*2)
   163  	for _, ping := range infos {
   164  		maps = append(maps, ping.Alive)
   165  		maps = append(maps, ping.Dead)
   166  	}
   167  	seqs, err := decompressPings(maps)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  	seqSet := make(map[int64]struct{})
   172  	for _, seq := range seqs {
   173  		seqSet[seq] = struct{}{}
   174  	}
   175  	return seqSet, nil
   176  }
   177  
   178  // Prune removes beings from the beings collection that have been superseded by
   179  // another entry with a higher sequence.
   180  // It also removes pings that are outside of the 'active' range
   181  // (the last few slots)
   182  func (p *Pruner) Prune(memCache map[int64]string) error {
   183  	err := p.removeOldPings()
   184  	if err != nil {
   185  		return errors.Trace(err)
   186  	}
   187  	err = p.removeUnusedBeings(memCache)
   188  	if err != nil {
   189  		return errors.Trace(err)
   190  	}
   191  	return nil
   192  }
   193  
   194  // NewPruner returns an object that is ready to prune the Beings collection
   195  // of old beings sequence entries that we no longer need.
   196  func NewPruner(modelUUID string, beings *mgo.Collection, pings *mgo.Collection, delta time.Duration) *Pruner {
   197  	return &Pruner{
   198  		modelUUID: modelUUID,
   199  		beingsC:   beings,
   200  		maxQueue:  1000,
   201  		pingsC:    pings,
   202  		delta:     delta,
   203  	}
   204  }