github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/cleaner/retention/retention.go (about) 1 package retention 2 3 import ( 4 "flag" 5 "iter" 6 "math" 7 "slices" 8 "time" 9 10 "github.com/go-kit/log" 11 "github.com/go-kit/log/level" 12 "github.com/prometheus/common/model" 13 "go.etcd.io/bbolt" 14 15 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 16 indexstore "github.com/grafana/pyroscope/pkg/metastore/index/store" 17 ) 18 19 // Policy determines which parts of the index should be retained or deleted. 20 type Policy interface { 21 // CreateTombstones examines the provided partitions and returns tombstones 22 // for shards that should be deleted according to the policy. 23 CreateTombstones(*bbolt.Tx, iter.Seq[indexstore.Partition]) []*metastorev1.Tombstones 24 } 25 26 type Config struct { 27 RetentionPeriod model.Duration `yaml:"retention_period" doc:"hidden"` 28 } 29 30 type Overrides interface { 31 Retention() (defaults Config, overrides iter.Seq2[string, Config]) 32 } 33 34 func (c *Config) RegisterFlags(f *flag.FlagSet) { 35 c.RetentionPeriod = model.Duration(time.Hour * 24 * 31) 36 f.Var(&c.RetentionPeriod, "retention-period", "Retention period for the data. 0 means data never deleted.") 37 } 38 39 // TimeBasedRetentionPolicy implements a retention policy based on time. 40 type TimeBasedRetentionPolicy struct { 41 logger log.Logger 42 overrides map[string]*marker 43 gracePeriod time.Duration 44 maxTombstones int 45 46 markers []*marker 47 defaultPeriod *marker 48 tombstones []*metastorev1.Tombstones 49 } 50 51 type marker struct { 52 tenantID string 53 timestamp time.Time 54 } 55 56 func NewTimeBasedRetentionPolicy( 57 logger log.Logger, 58 overrides Overrides, 59 maxTombstones int, 60 gracePeriod time.Duration, 61 now time.Time, 62 ) *TimeBasedRetentionPolicy { 63 defaults, tenantOverrides := overrides.Retention() 64 rp := TimeBasedRetentionPolicy{ 65 logger: logger, 66 overrides: make(map[string]*marker), 67 tombstones: make([]*metastorev1.Tombstones, 0, maxTombstones), 68 maxTombstones: maxTombstones, 69 gracePeriod: gracePeriod, 70 } 71 // Markers indicate the time before which data should be deleted 72 // for a given tenant. 73 for tenantID, override := range tenantOverrides { 74 if defaults.RetentionPeriod == override.RetentionPeriod { 75 continue 76 } 77 // An override is defined for the tenant, so we need to adjust the 78 // retention period for it. By default, we assume that the retention 79 // period is not defined, i.e. is infinite. 80 var timestamp time.Time // zero value means no retention period. 81 if period := time.Duration(override.RetentionPeriod); period > 0 { 82 timestamp = now.Add(-period) 83 } 84 m := &marker{ 85 tenantID: tenantID, 86 timestamp: timestamp, 87 } 88 rp.markers = append(rp.markers, m) 89 rp.overrides[tenantID] = m 90 } 91 // The default retention period is handled separately: we won't create 92 // the marker if the retention period is not set. This allows us to avoid 93 // checking all partition tenant shards, and instead only check specific 94 // tenants that have a defined retention policy. 95 if defaults.RetentionPeriod > 0 { 96 rp.defaultPeriod = &marker{timestamp: now.Add(-time.Duration(defaults.RetentionPeriod))} 97 rp.markers = append(rp.markers, rp.defaultPeriod) 98 } 99 // It is fine if there are marker pointing to the same time: for example, 100 // if an override is set explicitly for a tenant, but it matches the 101 // default value. 102 slices.SortFunc(rp.markers, func(a, b *marker) int { 103 return a.timestamp.Compare(b.timestamp) 104 }) 105 106 return &rp 107 } 108 109 func (rp *TimeBasedRetentionPolicy) CreateTombstones(tx *bbolt.Tx, partitions iter.Seq[indexstore.Partition]) []*metastorev1.Tombstones { 110 if len(rp.markers) == 0 { 111 level.Debug(rp.logger).Log("msg", "no retention policies defined, skipping") 112 return nil 113 } 114 for _, m := range rp.markers { 115 level.Debug(rp.logger).Log("msg", "found retention marker", "tenant", m.tenantID, "timestamp", m.timestamp) 116 } 117 rp.tombstones = rp.tombstones[:0] 118 for p := range partitions { 119 if len(rp.tombstones) >= rp.maxTombstones { 120 break 121 } 122 if !rp.processPartition(tx, p) { 123 break 124 } 125 } 126 return rp.tombstones 127 } 128 129 func (rp *TimeBasedRetentionPolicy) processPartition(tx *bbolt.Tx, p indexstore.Partition) bool { 130 // We want to find the markers that are before the partition end, i.e. the 131 // markers that indicate the time before which data should be deleted. For 132 // tenants D and E we need to inspect the partition. Otherwise, if there 133 // are no markers after the partition end, we stop. 134 // 135 // | partition | 136 // | start end | t 137 // ----------|----------------------x-------------> 138 // * * * * * 139 // markers: A B C D E 140 // 141 // Note that we also add a grace period to the partition end time, so that 142 // we won't be checking it for this period. Since tombstones are only 143 // created after the shard max time is before the marker timestamp, and the 144 // distance between them might be large (hours), we would be wasting time 145 // if were inspecting the partition right away. 146 partitionEnd := &marker{timestamp: p.EndTime().Add(rp.gracePeriod)} 147 logger := log.With(rp.logger, "partition", p.String()) 148 level.Debug(logger).Log( 149 "msg", "processing partition", 150 "partition_end_marker", partitionEnd.timestamp, 151 "retention_markers", len(rp.markers), 152 ) 153 154 i, _ := slices.BinarySearchFunc(rp.markers, partitionEnd, func(a, b *marker) int { 155 return a.timestamp.Compare(b.timestamp) 156 }) 157 if i >= len(rp.markers) { 158 // All markers are before the partition end: it can't be deleted. 159 // We can stop here: no partitions after this one will have deletion 160 // markers that are before the partition end. 161 level.Debug(logger).Log("msg", "partition has not passed the retention period, skipping") 162 return false 163 } 164 165 q := p.Query(tx) 166 if q == nil { 167 level.Warn(logger).Log("msg", "cannot find partition, skipping") 168 return true 169 } 170 171 // The anonymous tenant is ignored here, we only collect tombstones for the 172 // specific tenants, which have a defined retention policy. 173 if rp.defaultPeriod == nil || rp.defaultPeriod.timestamp.Before(partitionEnd.timestamp) { 174 // Fast path for the case when there are markers very far in the future 175 // relatively the default marker, or no default marker at all. 176 // 177 // The default retention period has not expired yet, so we don't need 178 // to inspect all the tenants. Instead, we can just examine the markers 179 // that are after the partition end: these tenants have retention 180 // period shorter than the default one. This is useful in case if the 181 // tenant data is deleted by setting very short retention period: we 182 // won't check each and every partition tenant shard. 183 level.Debug(logger).Log("msg", "creating tombstones for tenant markers", "retention_markers", len(rp.markers[i:])) 184 rp.createTombstonesForMarkers(q, rp.markers[i:]) 185 } else { 186 // Otherwise, we need to inspect all the tenants in the partition. 187 // There's no point in checking the markers: either most of them 188 // will result in tombstones, or have already been deleted (e.g., 189 // there's one tenant with an infinite retention period). 190 level.Debug(logger).Log("msg", "creating tombstones for partition tenants") 191 rp.createTombstonesForTenants(q, partitionEnd) 192 } 193 194 // Finally, we need to check if the anonymous tenant has any tombstones to 195 // collect. We only delete it if there are no other tenant shards in the 196 // partition: this guarantees that we don't delete data that is still 197 // needed, as we'd have the named tenant shards otherwise. Note that the 198 // tombstones we created this far are not resulted in the deletion of 199 // shards yet, so we will only delete the anonymous tenant on a second 200 // pass. 201 // 202 // NOTE(kolesnikovae): 203 // 204 // The approach may result in keeping the anonymous tenant data longer than 205 // necessary, but it should not be a problem in practice, as we assume that 206 // it contains no blocks: those are removed at L0 compaction. However, the 207 // shard-level structures such as tenant-shard buckets and string tables 208 // are not removed until the shard is deleted, which may also affect the 209 // index size. Ideally, we should seal partitions (create checkpoints) at 210 // some point that would protect it from modifications; then, we could 211 // delete the anon tenant shards safely, if there's no uncompacted data. 212 // 213 // An alternative approach would be to mark anon shards as we remove blocks 214 // from them, or create tombstones. We cannot do this as a side effect, to 215 // avoid state drift between the replicas (although this is arguable – such 216 // deletion is a side effect per se, and it only concerns the local state), 217 // but we can find the marks during the cleanup job. That could be 218 // implemented as a separate retention policy. 219 rp.createTombstonesForAnonTenant(q) 220 return len(rp.tombstones) < rp.maxTombstones 221 } 222 223 func (rp *TimeBasedRetentionPolicy) createTombstonesForMarkers(q *indexstore.PartitionQuery, markers []*marker) { 224 for _, m := range markers { 225 if m.tenantID == "" { 226 continue 227 } 228 if !rp.createTombstones(q, m) { 229 return 230 } 231 } 232 } 233 234 func (rp *TimeBasedRetentionPolicy) createTombstonesForTenants(q *indexstore.PartitionQuery, partitionEnd *marker) { 235 for tenantID := range q.Tenants() { 236 if tenantID == "" { 237 continue 238 } 239 var m *marker 240 if o, ok := rp.overrides[tenantID]; ok { 241 m = o 242 } else if rp.defaultPeriod != nil { 243 // Tenant-specific marker using the default retention period. 244 m = &marker{tenantID: tenantID, timestamp: rp.defaultPeriod.timestamp} 245 } else { 246 // No retention policy for this tenant, and no default: 247 // we retain the data indefinitely. 248 continue 249 } 250 if m.timestamp.After(partitionEnd.timestamp) { 251 if !rp.createTombstones(q, m) { 252 return 253 } 254 } 255 } 256 } 257 258 func (rp *TimeBasedRetentionPolicy) createTombstonesForAnonTenant(q *indexstore.PartitionQuery) { 259 if rp.hasTenants(q) { 260 // We have at least one tenant other than the anonymous one. 261 // We cannot delete the anonymous tenant shard yet – continue. 262 return 263 } 264 // Once shard max time passes the partition end time, we can 265 // create tombstones for the anonymous tenant shard. 266 level.Debug(rp.logger).Log("msg", "creating tombstones for anonymous tenant") 267 // We want to bypass the timestamp check for the anonymous tenant: 268 // we know that if all the other tenants have been processed, it's 269 // safe to create tombstones for the anonymous tenant. 270 rp.createTombstones(q, &marker{timestamp: time.UnixMilli(math.MaxInt64)}) 271 } 272 273 func (rp *TimeBasedRetentionPolicy) hasTenants(q *indexstore.PartitionQuery) bool { 274 var n int 275 for tenant := range q.Tenants() { 276 n++ 277 if n > 1 || tenant != "" { 278 return true 279 } 280 } 281 return false 282 } 283 284 func (rp *TimeBasedRetentionPolicy) createTombstones(q *indexstore.PartitionQuery, m *marker) bool { 285 for shard := range q.Shards(m.tenantID) { 286 if len(rp.tombstones) >= rp.maxTombstones { 287 return false 288 } 289 maxTime := time.Unix(0, shard.ShardIndex.MaxTime) 290 if maxTime.Before(m.timestamp) { 291 // The shard does not contain data before the marker. 292 name := shard.TombstoneName() 293 level.Debug(rp.logger).Log("msg", "creating tombstone", "name", name) 294 rp.tombstones = append(rp.tombstones, &metastorev1.Tombstones{ 295 Shard: &metastorev1.ShardTombstone{ 296 Name: name, 297 Timestamp: q.Timestamp.UnixNano(), 298 Duration: q.Duration.Nanoseconds(), 299 Shard: shard.Shard, 300 Tenant: shard.Tenant, 301 }, 302 }) 303 } 304 } 305 return true 306 }