github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/kvstore/allocator/allocator.go (about) 1 // Copyright 2016-2020 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package allocator 16 17 import ( 18 "context" 19 "fmt" 20 "path" 21 "strconv" 22 "strings" 23 "time" 24 25 "github.com/cilium/cilium/pkg/allocator" 26 "github.com/cilium/cilium/pkg/idpool" 27 "github.com/cilium/cilium/pkg/kvstore" 28 "github.com/cilium/cilium/pkg/logging" 29 "github.com/cilium/cilium/pkg/logging/logfields" 30 31 "github.com/sirupsen/logrus" 32 ) 33 34 var ( 35 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "kvstorebackend") 36 ) 37 38 const ( 39 // maxAllocAttempts is the number of attempted allocation requests 40 // performed before failing. 41 maxAllocAttempts = 16 42 43 // listTimeout is the time to wait for the initial list operation to 44 // succeed when creating a new allocator 45 listTimeout = 3 * time.Minute 46 ) 47 48 // kvstoreBackend is an implentaton of pkg/allocator.Backend. It store 49 // identities in the following format: 50 // 51 // Slave keys: 52 // Slave keys are owned by individual nodes: 53 // - basePath/value/key1/node1 => 1001 54 // - basePath/value/key1/node2 => 1001 55 // - basePath/value/key2/node1 => 1002 56 // - basePath/value/key2/node2 => 1002 57 // 58 // If at least one key exists with the prefix basePath/value/keyN then that 59 // key must be considered to be in use in the allocation space. 60 // 61 // Slave keys are protected by a lease and will automatically get removed 62 // after ~ option.Config.KVstoreLeaseTTL if the node does not renew in time. 63 // 64 // Master key: 65 // - basePath/id/1001 => key1 66 // - basePath/id/1002 => key2 67 // 68 // Master keys provide the mapping from ID to key. As long as a master key 69 // for an ID exists, the ID is still in use. However, if a master key is no 70 // longer backed by at least one slave key, the garbage collector will 71 // eventually release the master key and return it back to the pool. 72 type kvstoreBackend struct { 73 // lockless is true if allocation can be done lockless. This depends on 74 // the underlying kvstore backend 75 lockless bool 76 77 // basePrefix is the prefix in the kvstore that all keys share which 78 // are being managed by this allocator. The basePrefix typically 79 // consists of something like: "space/project/allocatorName" 80 basePrefix string 81 82 // idPrefix is the kvstore key prefix for all master keys. It is being 83 // derived from the basePrefix. 84 idPrefix string 85 86 // valuePrefix is the kvstore key prefix for all slave keys. It is 87 // being derived from the basePrefix. 88 valuePrefix string 89 90 // lockPrefix is the prefix to use for all kvstore locks. This prefix 91 // is different from the idPrefix and valuePrefix to simplify watching 92 // for ID and key changes. 93 lockPrefix string 94 95 // suffix is the suffix attached to keys which must be node specific, 96 // this is typical set to the node's IP address 97 suffix string 98 99 // deleteInvalidPrefixes enables deletion of identities outside of the 100 // valid prefix 101 deleteInvalidPrefixes bool 102 103 backend kvstore.BackendOperations 104 105 keyType allocator.AllocatorKey 106 } 107 108 func locklessCapability(backend kvstore.BackendOperations) bool { 109 required := kvstore.CapabilityCreateIfExists | kvstore.CapabilityDeleteOnZeroCount 110 return backend.GetCapabilities()&required == required 111 } 112 113 func prefixMatchesKey(prefix, key string) bool { 114 // cilium/state/identities/v1/value/label;foo;bar;/172.0.124.60 115 lastSlash := strings.LastIndex(key, "/") 116 return len(prefix) == lastSlash 117 } 118 119 // NewKVStoreBackend creates a pkg/allocator.Backend compatible instance. The 120 // specific kvstore used is configured in pkg/kvstore. 121 func NewKVStoreBackend(basePath, suffix string, typ allocator.AllocatorKey, backend kvstore.BackendOperations) (*kvstoreBackend, error) { 122 if backend == nil { 123 return nil, fmt.Errorf("kvstore client not configured") 124 } 125 126 return &kvstoreBackend{ 127 basePrefix: basePath, 128 idPrefix: path.Join(basePath, "id"), 129 valuePrefix: path.Join(basePath, "value"), 130 lockPrefix: path.Join(basePath, "locks"), 131 suffix: suffix, 132 keyType: typ, 133 lockless: locklessCapability(backend), 134 backend: backend, 135 }, nil 136 } 137 138 // lockPath locks a key in the scope of an allocator 139 func (k *kvstoreBackend) lockPath(ctx context.Context, key string) (*kvstore.Lock, error) { 140 suffix := strings.TrimPrefix(key, k.basePrefix) 141 return kvstore.LockPath(ctx, k.backend, path.Join(k.lockPrefix, suffix)) 142 } 143 144 // DeleteAllKeys will delete all keys 145 func (k *kvstoreBackend) DeleteAllKeys() { 146 k.backend.DeletePrefix(k.basePrefix) 147 } 148 149 func (k *kvstoreBackend) encodeKey(key allocator.AllocatorKey) []byte { 150 return []byte(k.backend.Encode([]byte(key.GetKey()))) 151 } 152 153 // AllocateID allocates a key->ID mapping in the kvstore. 154 func (k *kvstoreBackend) AllocateID(ctx context.Context, id idpool.ID, key allocator.AllocatorKey) error { 155 // create /id/<ID> and fail if it already exists 156 keyPath := path.Join(k.idPrefix, id.String()) 157 success, err := k.backend.CreateOnly(ctx, keyPath, k.encodeKey(key), false) 158 if err != nil || !success { 159 return fmt.Errorf("unable to create master key '%s': %s", keyPath, err) 160 } 161 162 return nil 163 } 164 165 // AllocateID allocates a key->ID mapping in the kvstore. 166 func (k *kvstoreBackend) AllocateIDIfLocked(ctx context.Context, id idpool.ID, key allocator.AllocatorKey, lock kvstore.KVLocker) error { 167 // create /id/<ID> and fail if it already exists 168 keyPath := path.Join(k.idPrefix, id.String()) 169 success, err := k.backend.CreateOnlyIfLocked(ctx, keyPath, k.encodeKey(key), false, lock) 170 if err != nil || !success { 171 return fmt.Errorf("unable to create master key '%s': %s", keyPath, err) 172 } 173 174 return nil 175 } 176 177 // AcquireReference marks that this node is using this key->ID mapping in the kvstore. 178 func (k *kvstoreBackend) AcquireReference(ctx context.Context, id idpool.ID, key allocator.AllocatorKey, lock kvstore.KVLocker) error { 179 keyString := string(k.encodeKey(key)) 180 if err := k.createValueNodeKey(ctx, keyString, id, lock); err != nil { 181 return fmt.Errorf("unable to create slave key '%s': %s", keyString, err) 182 } 183 return nil 184 } 185 186 // createValueKey records that this "node" is using this key->ID 187 func (k *kvstoreBackend) createValueNodeKey(ctx context.Context, key string, newID idpool.ID, lock kvstore.KVLocker) error { 188 // add a new key /value/<key>/<node> to account for the reference 189 // The key is protected with a TTL/lease and will expire after LeaseTTL 190 valueKey := path.Join(k.valuePrefix, key, k.suffix) 191 if _, err := k.backend.UpdateIfDifferentIfLocked(ctx, valueKey, []byte(newID.String()), true, lock); err != nil { 192 return fmt.Errorf("unable to create value-node key '%s': %s", valueKey, err) 193 } 194 195 return nil 196 } 197 198 // Lock locks a key in the scope of an allocator 199 func (k *kvstoreBackend) lock(ctx context.Context, key string) (*kvstore.Lock, error) { 200 suffix := strings.TrimPrefix(key, k.basePrefix) 201 return kvstore.LockPath(ctx, k.backend, path.Join(k.lockPrefix, suffix)) 202 } 203 204 // Lock locks a key in the scope of an allocator 205 func (k *kvstoreBackend) Lock(ctx context.Context, key allocator.AllocatorKey) (kvstore.KVLocker, error) { 206 return k.lock(ctx, string(k.encodeKey(key))) 207 } 208 209 // Get returns the ID which is allocated to a key in the kvstore 210 func (k *kvstoreBackend) Get(ctx context.Context, key allocator.AllocatorKey) (idpool.ID, error) { 211 // ListPrefix() will return all keys matching the prefix, the prefix 212 // can cover multiple different keys, example: 213 // 214 // key1 := label1;label2; 215 // key2 := label1;label2;label3; 216 // 217 // In order to retrieve the correct key, the position of the last '/' 218 // is significant, e.g. 219 // 220 // prefix := cilium/state/identities/v1/value/label;foo; 221 // 222 // key1 := cilium/state/identities/v1/value/label;foo;/172.0.124.60 223 // key2 := cilium/state/identities/v1/value/label;foo;bar;/172.0.124.60 224 // 225 // Only key1 should match 226 prefix := path.Join(k.valuePrefix, string(k.encodeKey(key))) 227 pairs, err := k.backend.ListPrefix(prefix) 228 kvstore.Trace("ListPrefix", err, logrus.Fields{fieldPrefix: prefix, "entries": len(pairs)}) 229 if err != nil { 230 return 0, err 231 } 232 233 for k, v := range pairs { 234 if prefixMatchesKey(prefix, k) { 235 id, err := strconv.ParseUint(string(v.Data), 10, 64) 236 if err == nil { 237 return idpool.ID(id), nil 238 } 239 } 240 } 241 242 return idpool.NoID, nil 243 } 244 245 // GetIfLocked returns the ID which is allocated to a key in the kvstore 246 // if the client is still holding the given lock. 247 func (k *kvstoreBackend) GetIfLocked(ctx context.Context, key allocator.AllocatorKey, lock kvstore.KVLocker) (idpool.ID, error) { 248 // ListPrefixIfLocked() will return all keys matching the prefix, the prefix 249 // can cover multiple different keys, example: 250 // 251 // key1 := label1;label2; 252 // key2 := label1;label2;label3; 253 // 254 // In order to retrieve the correct key, the position of the last '/' 255 // is significant, e.g. 256 // 257 // prefix := cilium/state/identities/v1/value/label;foo; 258 // 259 // key1 := cilium/state/identities/v1/value/label;foo;/172.0.124.60 260 // key2 := cilium/state/identities/v1/value/label;foo;bar;/172.0.124.60 261 // 262 // Only key1 should match 263 prefix := path.Join(k.valuePrefix, string(k.encodeKey(key))) 264 pairs, err := k.backend.ListPrefixIfLocked(prefix, lock) 265 kvstore.Trace("ListPrefixLocked", err, logrus.Fields{fieldPrefix: prefix, "entries": len(pairs)}) 266 if err != nil { 267 return 0, err 268 } 269 270 for k, v := range pairs { 271 if prefixMatchesKey(prefix, k) { 272 id, err := strconv.ParseUint(string(v.Data), 10, 64) 273 if err == nil { 274 return idpool.ID(id), nil 275 } 276 } 277 } 278 279 return idpool.NoID, nil 280 } 281 282 // GetByID returns the key associated with an ID. Returns nil if no key is 283 // associated with the ID. 284 func (k *kvstoreBackend) GetByID(id idpool.ID) (allocator.AllocatorKey, error) { 285 v, err := k.backend.Get(path.Join(k.idPrefix, id.String())) 286 if err != nil { 287 return nil, err 288 } 289 290 s, err := k.backend.Decode(string(v)) 291 if err != nil { 292 return nil, err 293 } 294 295 return k.keyType.PutKey(string(s)), nil 296 } 297 298 // UpdateKey refreshes the record that this node is using this key -> id 299 // mapping. When reliablyMissing is set it will also recreate missing master or 300 // slave keys. 301 func (k *kvstoreBackend) UpdateKey(ctx context.Context, id idpool.ID, key allocator.AllocatorKey, reliablyMissing bool) error { 302 var ( 303 err error 304 recreated bool 305 keyPath = path.Join(k.idPrefix, id.String()) 306 valueKey = path.Join(k.valuePrefix, string(k.encodeKey(key)), k.suffix) 307 ) 308 309 // Use of CreateOnly() ensures that any existing potentially 310 // conflicting key is never overwritten. 311 success, err := k.backend.CreateOnly(ctx, keyPath, k.encodeKey(key), false) 312 switch { 313 case err != nil: 314 return fmt.Errorf("Unable to re-create missing master key \"%s\" -> \"%s\": %s", fieldKey, valueKey, err) 315 case success: 316 log.WithField(fieldKey, keyPath).Warning("Re-created missing master key") 317 } 318 319 // Also re-create the slave key in case it has been deleted. This will 320 // ensure that the next garbage collection cycle of any participating 321 // node does not remove the master key again. 322 if reliablyMissing { 323 recreated, err = k.backend.CreateOnly(ctx, valueKey, []byte(id.String()), true) 324 } else { 325 recreated, err = k.backend.UpdateIfDifferent(ctx, valueKey, []byte(id.String()), true) 326 } 327 switch { 328 case err != nil: 329 return fmt.Errorf("Unable to re-create missing slave key \"%s\" -> \"%s\": %s", fieldKey, valueKey, err) 330 case recreated: 331 log.WithField(fieldKey, valueKey).Warning("Re-created missing slave key") 332 } 333 334 return nil 335 } 336 337 // UpdateKeyIfLocked refreshes the record that this node is using this key -> id 338 // mapping. When reliablyMissing is set it will also recreate missing master or 339 // slave keys. 340 func (k *kvstoreBackend) UpdateKeyIfLocked(ctx context.Context, id idpool.ID, key allocator.AllocatorKey, reliablyMissing bool, lock kvstore.KVLocker) error { 341 var ( 342 err error 343 recreated bool 344 keyPath = path.Join(k.idPrefix, id.String()) 345 valueKey = path.Join(k.valuePrefix, string(k.encodeKey(key)), k.suffix) 346 ) 347 348 // Use of CreateOnly() ensures that any existing potentially 349 // conflicting key is never overwritten. 350 success, err := k.backend.CreateOnlyIfLocked(ctx, keyPath, k.encodeKey(key), false, lock) 351 switch { 352 case err != nil: 353 return fmt.Errorf("Unable to re-create missing master key \"%s\" -> \"%s\": %s", fieldKey, valueKey, err) 354 case success: 355 log.WithField(fieldKey, keyPath).Warning("Re-created missing master key") 356 } 357 358 // Also re-create the slave key in case it has been deleted. This will 359 // ensure that the next garbage collection cycle of any participating 360 // node does not remove the master key again. 361 // lock is ignored since the key doesn't exist. 362 if reliablyMissing { 363 recreated, err = k.backend.CreateOnly(ctx, valueKey, []byte(id.String()), true) 364 } else { 365 recreated, err = k.backend.UpdateIfDifferentIfLocked(ctx, valueKey, []byte(id.String()), true, lock) 366 } 367 switch { 368 case err != nil: 369 return fmt.Errorf("Unable to re-create missing slave key \"%s\" -> \"%s\": %s", fieldKey, valueKey, err) 370 case recreated: 371 log.WithField(fieldKey, valueKey).Warning("Re-created missing slave key") 372 } 373 374 return nil 375 } 376 377 // Release releases the use of an ID associated with the provided key. It does 378 // not guard against concurrent releases. This is currently guarded by 379 // Allocator.slaveKeysMutex when called from pkg/allocator.Allocator.Release. 380 func (k *kvstoreBackend) Release(ctx context.Context, _ idpool.ID, key allocator.AllocatorKey) (err error) { 381 log.WithField(fieldKey, key).Info("Releasing key") 382 valueKey := path.Join(k.valuePrefix, string(k.encodeKey(key)), k.suffix) 383 log.WithField(fieldKey, key).Info("Released last local use of key, invoking global release") 384 385 // does not need to be deleted with a lock as its protected by the 386 // Allocator.slaveKeysMutex 387 if err := k.backend.Delete(valueKey); err != nil { 388 log.WithError(err).WithFields(logrus.Fields{fieldKey: key}).Warning("Ignoring node specific ID") 389 return err 390 } 391 392 // if k.lockless { 393 // FIXME: etcd 3.3 will make it possible to do a lockless 394 // cleanup of the ID and release it right away. For now we rely 395 // on the GC to kick in a release unused IDs. 396 // } 397 398 return nil 399 } 400 401 // RunLocksGC scans the kvstore for unused locks and removes them. Returns 402 // a map of locks that are currently being held, including the ones that have 403 // failed to be GCed. 404 func (k *kvstoreBackend) RunLocksGC(staleKeysPrevRound map[string]kvstore.Value) (map[string]kvstore.Value, error) { 405 // fetch list of all /../locks keys 406 allocated, err := k.backend.ListPrefix(k.lockPrefix) 407 if err != nil { 408 return nil, fmt.Errorf("list failed: %s", err) 409 } 410 411 staleKeys := map[string]kvstore.Value{} 412 413 // iterate over /../locks 414 for key, v := range allocated { 415 scopedLog := log.WithFields(logrus.Fields{ 416 fieldKey: key, 417 fieldLeaseID: fmt.Sprintf("%x", v.LeaseID), 418 }) 419 // Only delete if this key was previously marked as to be deleted 420 if modRev, ok := staleKeysPrevRound[key]; ok && 421 // comparing ModRevision ensures the same client is still holding 422 // this lock since the last GC was called. 423 modRev.ModRevision == v.ModRevision && 424 modRev.LeaseID == v.LeaseID && 425 modRev.SessionID == v.SessionID { 426 if err := k.backend.Delete(key); err == nil { 427 scopedLog.Warning("Forcefully removed distributed lock due to client staleness." + 428 " Please check the connectivity between the KVStore and the client with that lease ID.") 429 continue 430 } 431 scopedLog.WithError(err). 432 Warning("Unable to remove distributed lock due to client staleness." + 433 " Please check the connectivity between the KVStore and the client with that lease ID.") 434 } 435 // If the key was not found mark it to be delete in the next RunGC 436 staleKeys[key] = kvstore.Value{ 437 ModRevision: v.ModRevision, 438 LeaseID: v.LeaseID, 439 SessionID: v.SessionID, 440 } 441 } 442 443 return staleKeys, nil 444 } 445 446 // RunGC scans the kvstore for unused master keys and removes them 447 func (k *kvstoreBackend) RunGC(staleKeysPrevRound map[string]uint64) (map[string]uint64, error) { 448 // fetch list of all /id/ keys 449 allocated, err := k.backend.ListPrefix(k.idPrefix) 450 if err != nil { 451 return nil, fmt.Errorf("list failed: %s", err) 452 } 453 454 staleKeys := map[string]uint64{} 455 456 // iterate over /id/ 457 for key, v := range allocated { 458 // if k.lockless { 459 // FIXME: Add DeleteOnZeroCount support 460 // } 461 462 lock, err := k.lockPath(context.Background(), key) 463 if err != nil { 464 log.WithError(err).WithField(fieldKey, key).Warning("allocator garbage collector was unable to lock key") 465 continue 466 } 467 468 // fetch list of all /value/<key> keys 469 valueKeyPrefix := path.Join(k.valuePrefix, string(v.Data)) 470 pairs, err := k.backend.ListPrefixIfLocked(valueKeyPrefix, lock) 471 if err != nil { 472 log.WithError(err).WithField(fieldPrefix, valueKeyPrefix).Warning("allocator garbage collector was unable to list keys") 473 lock.Unlock() 474 continue 475 } 476 477 hasUsers := false 478 for prefix := range pairs { 479 if prefixMatchesKey(valueKeyPrefix, prefix) { 480 hasUsers = true 481 break 482 } 483 } 484 485 // if ID has no user, delete it 486 if !hasUsers { 487 scopedLog := log.WithFields(logrus.Fields{ 488 fieldKey: key, 489 fieldID: path.Base(key), 490 }) 491 // Only delete if this key was previously marked as to be deleted 492 if modRev, ok := staleKeysPrevRound[key]; ok && modRev == v.ModRevision { 493 if err := k.backend.DeleteIfLocked(key, lock); err != nil { 494 scopedLog.WithError(err).Warning("Unable to delete unused allocator master key") 495 } else { 496 scopedLog.Info("Deleted unused allocator master key") 497 } 498 } else { 499 // If the key was not found mark it to be delete in the next RunGC 500 staleKeys[key] = v.ModRevision 501 } 502 } 503 504 lock.Unlock() 505 } 506 507 return staleKeys, nil 508 } 509 510 func (k *kvstoreBackend) keyToID(key string) (id idpool.ID, err error) { 511 if !strings.HasPrefix(key, k.idPrefix) { 512 return idpool.NoID, fmt.Errorf("Found invalid key \"%s\" outside of prefix \"%s\"", key, k.idPrefix) 513 } 514 515 suffix := strings.TrimPrefix(key, k.idPrefix) 516 if suffix[0] == '/' { 517 suffix = suffix[1:] 518 } 519 520 idParsed, err := strconv.ParseUint(suffix, 10, 64) 521 if err != nil { 522 return idpool.NoID, fmt.Errorf("Cannot parse key suffix \"%s\"", suffix) 523 } 524 525 return idpool.ID(idParsed), nil 526 } 527 528 func (k *kvstoreBackend) ListAndWatch(handler allocator.CacheMutations, stopChan chan struct{}) { 529 watcher := k.backend.ListAndWatch(k.idPrefix, k.idPrefix, 512) 530 531 for { 532 select { 533 case event, ok := <-watcher.Events: 534 if !ok { 535 goto abort 536 } 537 if event.Typ == kvstore.EventTypeListDone { 538 handler.OnListDone() 539 continue 540 } 541 542 id, err := k.keyToID(event.Key) 543 switch { 544 case err != nil: 545 log.WithError(err).WithField(fieldKey, event.Key).Warning("Invalid key") 546 547 if k.deleteInvalidPrefixes { 548 k.backend.Delete(event.Key) 549 } 550 551 case id != idpool.NoID: 552 var key allocator.AllocatorKey 553 554 if len(event.Value) > 0 { 555 s, err := k.backend.Decode(string(event.Value)) 556 if err != nil { 557 log.WithError(err).WithFields(logrus.Fields{ 558 fieldKey: event.Key, 559 fieldValue: event.Value, 560 }).Warning("Unable to decode key value") 561 } else { 562 key = k.keyType.PutKey(string(s)) 563 } 564 } 565 566 switch event.Typ { 567 case kvstore.EventTypeCreate: 568 handler.OnAdd(id, key) 569 570 case kvstore.EventTypeModify: 571 handler.OnModify(id, key) 572 573 case kvstore.EventTypeDelete: 574 handler.OnDelete(id, key) 575 } 576 } 577 578 case <-stopChan: 579 goto abort 580 } 581 } 582 583 abort: 584 watcher.Stop() 585 } 586 587 func (k *kvstoreBackend) Status() (string, error) { 588 return k.backend.Status() 589 } 590 591 func (k *kvstoreBackend) Encode(v string) string { 592 return k.backend.Encode([]byte(v)) 593 }