github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/purger/delete_requests_store.go (about) 1 package purger 2 3 import ( 4 "context" 5 "encoding/binary" 6 "encoding/hex" 7 "errors" 8 "flag" 9 "fmt" 10 "hash/fnv" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/cortexproject/cortex/pkg/chunk" 16 17 "github.com/prometheus/common/model" 18 "github.com/prometheus/prometheus/pkg/labels" 19 ) 20 21 type ( 22 DeleteRequestStatus string 23 CacheKind string 24 indexType string 25 ) 26 27 const ( 28 StatusReceived DeleteRequestStatus = "received" 29 StatusBuildingPlan DeleteRequestStatus = "buildingPlan" 30 StatusDeleting DeleteRequestStatus = "deleting" 31 StatusProcessed DeleteRequestStatus = "processed" 32 33 separator = "\000" // separator for series selectors in delete requests 34 35 // CacheKindStore is for cache gen number for store cache 36 CacheKindStore CacheKind = "store" 37 // CacheKindResults is for cache gen number for results cache 38 CacheKindResults CacheKind = "results" 39 40 deleteRequestID indexType = "1" 41 deleteRequestDetails indexType = "2" 42 cacheGenNum indexType = "3" 43 ) 44 45 var ( 46 pendingDeleteRequestStatuses = []DeleteRequestStatus{StatusReceived, StatusBuildingPlan, StatusDeleting} 47 48 ErrDeleteRequestNotFound = errors.New("could not find matching delete request") 49 ) 50 51 // DeleteRequest holds all the details about a delete request. 52 type DeleteRequest struct { 53 RequestID string `json:"request_id"` 54 UserID string `json:"-"` 55 StartTime model.Time `json:"start_time"` 56 EndTime model.Time `json:"end_time"` 57 Selectors []string `json:"selectors"` 58 Status DeleteRequestStatus `json:"status"` 59 Matchers [][]*labels.Matcher `json:"-"` 60 CreatedAt model.Time `json:"created_at"` 61 } 62 63 // cacheGenNumbers holds store and results cache gen numbers for a user. 64 type cacheGenNumbers struct { 65 store, results string 66 } 67 68 // DeleteStore provides all the methods required to manage lifecycle of delete request and things related to it. 69 type DeleteStore struct { 70 cfg DeleteStoreConfig 71 indexClient chunk.IndexClient 72 } 73 74 // DeleteStoreConfig holds configuration for delete store. 75 type DeleteStoreConfig struct { 76 Store string `yaml:"store"` 77 RequestsTableName string `yaml:"requests_table_name"` 78 ProvisionConfig TableProvisioningConfig `yaml:"table_provisioning"` 79 } 80 81 // RegisterFlags adds the flags required to configure this flag set. 82 func (cfg *DeleteStoreConfig) RegisterFlags(f *flag.FlagSet) { 83 cfg.ProvisionConfig.RegisterFlags("deletes.table", f) 84 f.StringVar(&cfg.Store, "deletes.store", "", "Store for keeping delete request") 85 f.StringVar(&cfg.RequestsTableName, "deletes.requests-table-name", "delete_requests", "Name of the table which stores delete requests") 86 } 87 88 // NewDeleteStore creates a store for managing delete requests. 89 func NewDeleteStore(cfg DeleteStoreConfig, indexClient chunk.IndexClient) (*DeleteStore, error) { 90 ds := DeleteStore{ 91 cfg: cfg, 92 indexClient: indexClient, 93 } 94 95 return &ds, nil 96 } 97 98 // Add creates entries for a new delete request. 99 func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, startTime, endTime model.Time, selectors []string) error { 100 return ds.addDeleteRequest(ctx, userID, model.Now(), startTime, endTime, selectors) 101 102 } 103 104 // addDeleteRequest is also used for tests to create delete requests with different createdAt time. 105 func (ds *DeleteStore) addDeleteRequest(ctx context.Context, userID string, createdAt, startTime, endTime model.Time, selectors []string) error { 106 requestID := generateUniqueID(userID, selectors) 107 108 for { 109 _, err := ds.GetDeleteRequest(ctx, userID, string(requestID)) 110 if err != nil { 111 if err == ErrDeleteRequestNotFound { 112 break 113 } 114 return err 115 } 116 117 // we have a collision here, lets recreate a new requestID and check for collision 118 time.Sleep(time.Millisecond) 119 requestID = generateUniqueID(userID, selectors) 120 } 121 122 // userID, requestID 123 userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID) 124 125 // Add an entry with userID, requestID as range key and status as value to make it easy to manage and lookup status 126 // We don't want to set anything in hash key here since we would want to find delete requests by just status 127 writeBatch := ds.indexClient.NewWriteBatch() 128 writeBatch.Add(ds.cfg.RequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID), []byte(StatusReceived)) 129 130 // Add another entry with additional details like creation time, time range of delete request and selectors in value 131 rangeValue := fmt.Sprintf("%x:%x:%x", int64(createdAt), int64(startTime), int64(endTime)) 132 writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s", deleteRequestDetails, userIDAndRequestID), 133 []byte(rangeValue), []byte(strings.Join(selectors, separator))) 134 135 // we update only cache gen number because only query responses are changing at this stage. 136 // we still have to query data from store for doing query time filtering and we don't want to invalidate its results now. 137 writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindResults), 138 []byte{}, []byte(strconv.FormatInt(time.Now().Unix(), 10))) 139 140 return ds.indexClient.BatchWrite(ctx, writeBatch) 141 } 142 143 // GetDeleteRequestsByStatus returns all delete requests for given status. 144 func (ds *DeleteStore) GetDeleteRequestsByStatus(ctx context.Context, status DeleteRequestStatus) ([]DeleteRequest, error) { 145 return ds.queryDeleteRequests(ctx, chunk.IndexQuery{ 146 TableName: ds.cfg.RequestsTableName, 147 HashValue: string(deleteRequestID), 148 ValueEqual: []byte(status), 149 }) 150 } 151 152 // GetDeleteRequestsForUserByStatus returns all delete requests for a user with given status. 153 func (ds *DeleteStore) GetDeleteRequestsForUserByStatus(ctx context.Context, userID string, status DeleteRequestStatus) ([]DeleteRequest, error) { 154 return ds.queryDeleteRequests(ctx, chunk.IndexQuery{ 155 TableName: ds.cfg.RequestsTableName, 156 HashValue: string(deleteRequestID), 157 RangeValuePrefix: []byte(userID), 158 ValueEqual: []byte(status), 159 }) 160 } 161 162 // GetAllDeleteRequestsForUser returns all delete requests for a user. 163 func (ds *DeleteStore) GetAllDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) { 164 return ds.queryDeleteRequests(ctx, chunk.IndexQuery{ 165 TableName: ds.cfg.RequestsTableName, 166 HashValue: string(deleteRequestID), 167 RangeValuePrefix: []byte(userID), 168 }) 169 } 170 171 // UpdateStatus updates status of a delete request. 172 func (ds *DeleteStore) UpdateStatus(ctx context.Context, userID, requestID string, newStatus DeleteRequestStatus) error { 173 userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID) 174 175 writeBatch := ds.indexClient.NewWriteBatch() 176 writeBatch.Add(ds.cfg.RequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID), []byte(newStatus)) 177 178 if newStatus == StatusProcessed { 179 // we have deleted data from store so invalidate cache only for store since we don't have to do runtime filtering anymore. 180 // we don't have to change cache gen number because we were anyways doing runtime filtering 181 writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindStore), []byte{}, []byte(strconv.FormatInt(time.Now().Unix(), 10))) 182 } 183 184 return ds.indexClient.BatchWrite(ctx, writeBatch) 185 } 186 187 // GetDeleteRequest returns delete request with given requestID. 188 func (ds *DeleteStore) GetDeleteRequest(ctx context.Context, userID, requestID string) (*DeleteRequest, error) { 189 userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID) 190 191 deleteRequests, err := ds.queryDeleteRequests(ctx, chunk.IndexQuery{ 192 TableName: ds.cfg.RequestsTableName, 193 HashValue: string(deleteRequestID), 194 RangeValuePrefix: []byte(userIDAndRequestID), 195 }) 196 197 if err != nil { 198 return nil, err 199 } 200 201 if len(deleteRequests) == 0 { 202 return nil, ErrDeleteRequestNotFound 203 } 204 205 return &deleteRequests[0], nil 206 } 207 208 // GetPendingDeleteRequestsForUser returns all delete requests for a user which are not processed. 209 func (ds *DeleteStore) GetPendingDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) { 210 pendingDeleteRequests := []DeleteRequest{} 211 for _, status := range pendingDeleteRequestStatuses { 212 deleteRequests, err := ds.GetDeleteRequestsForUserByStatus(ctx, userID, status) 213 if err != nil { 214 return nil, err 215 } 216 217 pendingDeleteRequests = append(pendingDeleteRequests, deleteRequests...) 218 } 219 220 return pendingDeleteRequests, nil 221 } 222 223 func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery chunk.IndexQuery) ([]DeleteRequest, error) { 224 deleteRequests := []DeleteRequest{} 225 // No need to lock inside the callback since we run a single index query. 226 err := ds.indexClient.QueryPages(ctx, []chunk.IndexQuery{deleteQuery}, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) { 227 itr := batch.Iterator() 228 for itr.Next() { 229 userID, requestID := splitUserIDAndRequestID(string(itr.RangeValue())) 230 231 deleteRequests = append(deleteRequests, DeleteRequest{ 232 UserID: userID, 233 RequestID: requestID, 234 Status: DeleteRequestStatus(itr.Value()), 235 }) 236 } 237 return true 238 }) 239 if err != nil { 240 return nil, err 241 } 242 243 for i, deleteRequest := range deleteRequests { 244 deleteRequestQuery := []chunk.IndexQuery{ 245 { 246 TableName: ds.cfg.RequestsTableName, 247 HashValue: fmt.Sprintf("%s:%s:%s", deleteRequestDetails, deleteRequest.UserID, deleteRequest.RequestID), 248 }, 249 } 250 251 var parseError error 252 err := ds.indexClient.QueryPages(ctx, deleteRequestQuery, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) { 253 itr := batch.Iterator() 254 itr.Next() 255 256 deleteRequest, err = parseDeleteRequestTimestamps(itr.RangeValue(), deleteRequest) 257 if err != nil { 258 parseError = err 259 return false 260 } 261 262 deleteRequest.Selectors = strings.Split(string(itr.Value()), separator) 263 deleteRequests[i] = deleteRequest 264 265 return true 266 }) 267 268 if err != nil { 269 return nil, err 270 } 271 272 if parseError != nil { 273 return nil, parseError 274 } 275 } 276 277 return deleteRequests, nil 278 } 279 280 // getCacheGenerationNumbers returns cache gen numbers for a user. 281 func (ds *DeleteStore) getCacheGenerationNumbers(ctx context.Context, userID string) (*cacheGenNumbers, error) { 282 storeCacheGen, err := ds.queryCacheGenerationNumber(ctx, userID, CacheKindStore) 283 if err != nil { 284 return nil, err 285 } 286 287 resultsCacheGen, err := ds.queryCacheGenerationNumber(ctx, userID, CacheKindResults) 288 if err != nil { 289 return nil, err 290 } 291 292 return &cacheGenNumbers{storeCacheGen, resultsCacheGen}, nil 293 } 294 295 func (ds *DeleteStore) queryCacheGenerationNumber(ctx context.Context, userID string, kind CacheKind) (string, error) { 296 query := chunk.IndexQuery{TableName: ds.cfg.RequestsTableName, HashValue: fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, kind)} 297 298 genNumber := "" 299 err := ds.indexClient.QueryPages(ctx, []chunk.IndexQuery{query}, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) { 300 itr := batch.Iterator() 301 for itr.Next() { 302 genNumber = string(itr.Value()) 303 break 304 } 305 return false 306 }) 307 308 if err != nil { 309 return "", err 310 } 311 312 return genNumber, nil 313 } 314 315 // RemoveDeleteRequest removes a delete request and increments cache gen number 316 func (ds *DeleteStore) RemoveDeleteRequest(ctx context.Context, userID, requestID string, createdAt, startTime, endTime model.Time) error { 317 userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID) 318 319 writeBatch := ds.indexClient.NewWriteBatch() 320 writeBatch.Delete(ds.cfg.RequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID)) 321 322 // Add another entry with additional details like creation time, time range of delete request and selectors in value 323 rangeValue := fmt.Sprintf("%x:%x:%x", int64(createdAt), int64(startTime), int64(endTime)) 324 writeBatch.Delete(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s", deleteRequestDetails, userIDAndRequestID), 325 []byte(rangeValue)) 326 327 // we need to invalidate results cache since removal of delete request would cause query results to change 328 writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindResults), 329 []byte{}, []byte(strconv.FormatInt(time.Now().Unix(), 10))) 330 331 return ds.indexClient.BatchWrite(ctx, writeBatch) 332 } 333 334 func parseDeleteRequestTimestamps(rangeValue []byte, deleteRequest DeleteRequest) (DeleteRequest, error) { 335 hexParts := strings.Split(string(rangeValue), ":") 336 if len(hexParts) != 3 { 337 return deleteRequest, errors.New("invalid key in parsing delete request lookup response") 338 } 339 340 createdAt, err := strconv.ParseInt(hexParts[0], 16, 64) 341 if err != nil { 342 return deleteRequest, err 343 } 344 345 from, err := strconv.ParseInt(hexParts[1], 16, 64) 346 if err != nil { 347 return deleteRequest, err 348 349 } 350 through, err := strconv.ParseInt(hexParts[2], 16, 64) 351 if err != nil { 352 return deleteRequest, err 353 354 } 355 356 deleteRequest.CreatedAt = model.Time(createdAt) 357 deleteRequest.StartTime = model.Time(from) 358 deleteRequest.EndTime = model.Time(through) 359 360 return deleteRequest, nil 361 } 362 363 // An id is useful in managing delete requests 364 func generateUniqueID(orgID string, selectors []string) []byte { 365 uniqueID := fnv.New32() 366 _, _ = uniqueID.Write([]byte(orgID)) 367 368 timeNow := make([]byte, 8) 369 binary.LittleEndian.PutUint64(timeNow, uint64(time.Now().UnixNano())) 370 _, _ = uniqueID.Write(timeNow) 371 372 for _, selector := range selectors { 373 _, _ = uniqueID.Write([]byte(selector)) 374 } 375 376 return encodeUniqueID(uniqueID.Sum32()) 377 } 378 379 func encodeUniqueID(t uint32) []byte { 380 throughBytes := make([]byte, 4) 381 binary.BigEndian.PutUint32(throughBytes, t) 382 encodedThroughBytes := make([]byte, 8) 383 hex.Encode(encodedThroughBytes, throughBytes) 384 return encodedThroughBytes 385 } 386 387 func splitUserIDAndRequestID(rangeValue string) (userID, requestID string) { 388 lastIndex := strings.LastIndex(rangeValue, ":") 389 390 userID = rangeValue[:lastIndex] 391 requestID = rangeValue[lastIndex+1:] 392 393 return 394 }