github.imxd.top/hashicorp/consul@v1.4.5/agent/consul/state/tombstone_gc.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "sync" 6 "time" 7 ) 8 9 // TombstoneGC is used to track creation of tombstones so that they can be 10 // garbage collected after their TTL expires. The tombstones allow queries to 11 // provide monotonic index values within the TTL window. The GC is used to 12 // prevent monotonic growth in storage usage. This is a trade off between the 13 // length of the TTL and the storage overhead. 14 // 15 // In practice, this is required to fix the issue of delete visibility. When 16 // data is deleted from the KV store, the "latest" row can go backwards if the 17 // newest row is removed. The tombstones provide a way to ensure time doesn't 18 // move backwards within some interval. 19 // 20 type TombstoneGC struct { 21 // ttl sets the TTL for tombstones. 22 ttl time.Duration 23 24 // granularity determines how we bin TTLs into timers. 25 granularity time.Duration 26 27 // enabled controls if we actually setup any timers. 28 enabled bool 29 30 // expires maps the time of expiration to the highest tombstone value 31 // that should be expired. 32 expires map[time.Time]*expireInterval 33 34 // expireCh is used to stream expiration to the leader for processing. 35 expireCh chan uint64 36 37 sync.Mutex 38 } 39 40 // expireInterval is used to track the maximum index to expire in a given 41 // interval with a timer. 42 type expireInterval struct { 43 // maxIndex has the highest tombstone index that should be GC-d. 44 maxIndex uint64 45 46 // timer is the timer tracking this bin. 47 timer *time.Timer 48 } 49 50 // NewTombstoneGC is used to construct a new TombstoneGC given a TTL for 51 // tombstones and a tracking granularity. Longer TTLs ensure correct behavior 52 // for more time, but use more storage. A shorter granularity increases the 53 // number of Raft transactions and reduce how far past the TTL we perform GC. 54 func NewTombstoneGC(ttl, granularity time.Duration) (*TombstoneGC, error) { 55 // Sanity check the inputs 56 if ttl <= 0 || granularity <= 0 { 57 return nil, fmt.Errorf("Tombstone TTL and granularity must be positive") 58 } 59 60 t := &TombstoneGC{ 61 ttl: ttl, 62 granularity: granularity, 63 expires: make(map[time.Time]*expireInterval), 64 expireCh: make(chan uint64, 1), 65 } 66 return t, nil 67 } 68 69 // ExpireCh is used to return a channel that streams the next index that should 70 // be expired. 71 func (t *TombstoneGC) ExpireCh() <-chan uint64 { 72 return t.expireCh 73 } 74 75 // SetEnabled is used to control if the tombstone GC is 76 // enabled. Should only be enabled by the leader node. 77 func (t *TombstoneGC) SetEnabled(enabled bool) { 78 t.Lock() 79 defer t.Unlock() 80 if enabled == t.enabled { 81 return 82 } 83 84 // Stop all the timers and clear 85 if !enabled { 86 for _, exp := range t.expires { 87 exp.timer.Stop() 88 } 89 t.expires = make(map[time.Time]*expireInterval) 90 } 91 92 // Update the status 93 t.enabled = enabled 94 } 95 96 // Hint is used to indicate that keys at the given index have been 97 // deleted, and that their GC should be scheduled. 98 func (t *TombstoneGC) Hint(index uint64) { 99 expires := t.nextExpires() 100 101 t.Lock() 102 defer t.Unlock() 103 if !t.enabled { 104 return 105 } 106 107 // Check for an existing expiration timer and bump its index if we 108 // find one. 109 exp, ok := t.expires[expires] 110 if ok { 111 if index > exp.maxIndex { 112 exp.maxIndex = index 113 } 114 return 115 } 116 117 // Create a new expiration timer. 118 t.expires[expires] = &expireInterval{ 119 maxIndex: index, 120 timer: time.AfterFunc(expires.Sub(time.Now()), func() { 121 t.expireTime(expires) 122 }), 123 } 124 } 125 126 // PendingExpiration is used to check if any expirations are pending. 127 func (t *TombstoneGC) PendingExpiration() bool { 128 t.Lock() 129 defer t.Unlock() 130 131 return len(t.expires) > 0 132 } 133 134 // nextExpires is used to calculate the next expiration time, based on the 135 // granularity that is set. This allows us to bin expirations and avoid a ton 136 // of timers. 137 func (t *TombstoneGC) nextExpires() time.Time { 138 // The Round(0) call here is to shed the monotonic time so that we 139 // can safely use these as map keys. See #3670 for more details. 140 expires := time.Now().Add(t.ttl).Round(0) 141 remain := expires.UnixNano() % int64(t.granularity) 142 adj := expires.Add(t.granularity - time.Duration(remain)) 143 return adj 144 } 145 146 // purgeBin gets the index for the given bin and then deletes the bin. If there 147 // is no bin then this will return 0 for the index, which is ok. 148 func (t *TombstoneGC) purgeBin(expires time.Time) uint64 { 149 t.Lock() 150 defer t.Unlock() 151 152 // Get the maximum index and clear the entry. It's possible that the GC 153 // has been shut down while this timer fired and got blocked on the lock, 154 // so if there's nothing in the map for us we just exit out since there 155 // is no work to do. 156 exp, ok := t.expires[expires] 157 if !ok { 158 return 0 159 } 160 delete(t.expires, expires) 161 return exp.maxIndex 162 } 163 164 // expireTime is used to expire the entries at the given time. 165 func (t *TombstoneGC) expireTime(expires time.Time) { 166 // This is careful to take the lock only while we are fetching the index 167 // since the channel write might get blocked for reasons that could also 168 // need to hint GC (see #3700). 169 if index := t.purgeBin(expires); index > 0 { 170 t.expireCh <- index 171 } 172 }