github.com/clly/consul@v1.4.5/agent/consul/state/tombstone_gc.go (about)

     1  package state
     2  
     3  import (
     4  	"fmt"
     5  	"sync"
     6  	"time"
     7  )
     8  
     9  // TombstoneGC is used to track creation of tombstones so that they can be
    10  // garbage collected after their TTL expires. The tombstones allow queries to
    11  // provide monotonic index values within the TTL window. The GC is used to
    12  // prevent monotonic growth in storage usage. This is a trade off between the
    13  // length of the TTL and the storage overhead.
    14  //
    15  // In practice, this is required to fix the issue of delete visibility. When
    16  // data is deleted from the KV store, the "latest" row can go backwards if the
    17  // newest row is removed. The tombstones provide a way to ensure time doesn't
    18  // move backwards within some interval.
    19  //
    20  type TombstoneGC struct {
    21  	// ttl sets the TTL for tombstones.
    22  	ttl time.Duration
    23  
    24  	// granularity determines how we bin TTLs into timers.
    25  	granularity time.Duration
    26  
    27  	// enabled controls if we actually setup any timers.
    28  	enabled bool
    29  
    30  	// expires maps the time of expiration to the highest tombstone value
    31  	// that should be expired.
    32  	expires map[time.Time]*expireInterval
    33  
    34  	// expireCh is used to stream expiration to the leader for processing.
    35  	expireCh chan uint64
    36  
    37  	sync.Mutex
    38  }
    39  
    40  // expireInterval is used to track the maximum index to expire in a given
    41  // interval with a timer.
    42  type expireInterval struct {
    43  	// maxIndex has the highest tombstone index that should be GC-d.
    44  	maxIndex uint64
    45  
    46  	// timer is the timer tracking this bin.
    47  	timer *time.Timer
    48  }
    49  
    50  // NewTombstoneGC is used to construct a new TombstoneGC given a TTL for
    51  // tombstones and a tracking granularity. Longer TTLs ensure correct behavior
    52  // for more time, but use more storage. A shorter granularity increases the
    53  // number of Raft transactions and reduce how far past the TTL we perform GC.
    54  func NewTombstoneGC(ttl, granularity time.Duration) (*TombstoneGC, error) {
    55  	// Sanity check the inputs
    56  	if ttl <= 0 || granularity <= 0 {
    57  		return nil, fmt.Errorf("Tombstone TTL and granularity must be positive")
    58  	}
    59  
    60  	t := &TombstoneGC{
    61  		ttl:         ttl,
    62  		granularity: granularity,
    63  		expires:     make(map[time.Time]*expireInterval),
    64  		expireCh:    make(chan uint64, 1),
    65  	}
    66  	return t, nil
    67  }
    68  
    69  // ExpireCh is used to return a channel that streams the next index that should
    70  // be expired.
    71  func (t *TombstoneGC) ExpireCh() <-chan uint64 {
    72  	return t.expireCh
    73  }
    74  
    75  // SetEnabled is used to control if the tombstone GC is
    76  // enabled. Should only be enabled by the leader node.
    77  func (t *TombstoneGC) SetEnabled(enabled bool) {
    78  	t.Lock()
    79  	defer t.Unlock()
    80  	if enabled == t.enabled {
    81  		return
    82  	}
    83  
    84  	// Stop all the timers and clear
    85  	if !enabled {
    86  		for _, exp := range t.expires {
    87  			exp.timer.Stop()
    88  		}
    89  		t.expires = make(map[time.Time]*expireInterval)
    90  	}
    91  
    92  	// Update the status
    93  	t.enabled = enabled
    94  }
    95  
    96  // Hint is used to indicate that keys at the given index have been
    97  // deleted, and that their GC should be scheduled.
    98  func (t *TombstoneGC) Hint(index uint64) {
    99  	expires := t.nextExpires()
   100  
   101  	t.Lock()
   102  	defer t.Unlock()
   103  	if !t.enabled {
   104  		return
   105  	}
   106  
   107  	// Check for an existing expiration timer and bump its index if we
   108  	// find one.
   109  	exp, ok := t.expires[expires]
   110  	if ok {
   111  		if index > exp.maxIndex {
   112  			exp.maxIndex = index
   113  		}
   114  		return
   115  	}
   116  
   117  	// Create a new expiration timer.
   118  	t.expires[expires] = &expireInterval{
   119  		maxIndex: index,
   120  		timer: time.AfterFunc(expires.Sub(time.Now()), func() {
   121  			t.expireTime(expires)
   122  		}),
   123  	}
   124  }
   125  
   126  // PendingExpiration is used to check if any expirations are pending.
   127  func (t *TombstoneGC) PendingExpiration() bool {
   128  	t.Lock()
   129  	defer t.Unlock()
   130  
   131  	return len(t.expires) > 0
   132  }
   133  
   134  // nextExpires is used to calculate the next expiration time, based on the
   135  // granularity that is set. This allows us to bin expirations and avoid a ton
   136  // of timers.
   137  func (t *TombstoneGC) nextExpires() time.Time {
   138  	// The Round(0) call here is to shed the monotonic time so that we
   139  	// can safely use these as map keys. See #3670 for more details.
   140  	expires := time.Now().Add(t.ttl).Round(0)
   141  	remain := expires.UnixNano() % int64(t.granularity)
   142  	adj := expires.Add(t.granularity - time.Duration(remain))
   143  	return adj
   144  }
   145  
   146  // purgeBin gets the index for the given bin and then deletes the bin. If there
   147  // is no bin then this will return 0 for the index, which is ok.
   148  func (t *TombstoneGC) purgeBin(expires time.Time) uint64 {
   149  	t.Lock()
   150  	defer t.Unlock()
   151  
   152  	// Get the maximum index and clear the entry. It's possible that the GC
   153  	// has been shut down while this timer fired and got blocked on the lock,
   154  	// so if there's nothing in the map for us we just exit out since there
   155  	// is no work to do.
   156  	exp, ok := t.expires[expires]
   157  	if !ok {
   158  		return 0
   159  	}
   160  	delete(t.expires, expires)
   161  	return exp.maxIndex
   162  }
   163  
   164  // expireTime is used to expire the entries at the given time.
   165  func (t *TombstoneGC) expireTime(expires time.Time) {
   166  	// This is careful to take the lock only while we are fetching the index
   167  	// since the channel write might get blocked for reasons that could also
   168  	// need to hint GC (see #3700).
   169  	if index := t.purgeBin(expires); index > 0 {
   170  		t.expireCh <- index
   171  	}
   172  }