github.imxd.top/hashicorp/consul@v1.4.5/agent/consul/state/kvs.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "strings" 6 "time" 7 8 "github.com/hashicorp/consul/agent/structs" 9 "github.com/hashicorp/go-memdb" 10 ) 11 12 // kvsTableSchema returns a new table schema used for storing key/value data for 13 // Consul's kv store. 14 func kvsTableSchema() *memdb.TableSchema { 15 return &memdb.TableSchema{ 16 Name: "kvs", 17 Indexes: map[string]*memdb.IndexSchema{ 18 "id": &memdb.IndexSchema{ 19 Name: "id", 20 AllowMissing: false, 21 Unique: true, 22 Indexer: &memdb.StringFieldIndex{ 23 Field: "Key", 24 Lowercase: false, 25 }, 26 }, 27 "session": &memdb.IndexSchema{ 28 Name: "session", 29 AllowMissing: true, 30 Unique: false, 31 Indexer: &memdb.UUIDFieldIndex{ 32 Field: "Session", 33 }, 34 }, 35 }, 36 } 37 } 38 39 // tombstonesTableSchema returns a new table schema used for storing tombstones 40 // during KV delete operations to prevent the index from sliding backwards. 41 func tombstonesTableSchema() *memdb.TableSchema { 42 return &memdb.TableSchema{ 43 Name: "tombstones", 44 Indexes: map[string]*memdb.IndexSchema{ 45 "id": &memdb.IndexSchema{ 46 Name: "id", 47 AllowMissing: false, 48 Unique: true, 49 Indexer: &memdb.StringFieldIndex{ 50 Field: "Key", 51 Lowercase: false, 52 }, 53 }, 54 }, 55 } 56 } 57 58 func init() { 59 registerSchema(kvsTableSchema) 60 registerSchema(tombstonesTableSchema) 61 } 62 63 // KVs is used to pull the full list of KVS entries for use during snapshots. 64 func (s *Snapshot) KVs() (memdb.ResultIterator, error) { 65 iter, err := s.tx.Get("kvs", "id_prefix") 66 if err != nil { 67 return nil, err 68 } 69 return iter, nil 70 } 71 72 // Tombstones is used to pull all the tombstones from the graveyard. 73 func (s *Snapshot) Tombstones() (memdb.ResultIterator, error) { 74 return s.store.kvsGraveyard.DumpTxn(s.tx) 75 } 76 77 // KVS is used when restoring from a snapshot. Use KVSSet for general inserts. 78 func (s *Restore) KVS(entry *structs.DirEntry) error { 79 if err := s.tx.Insert("kvs", entry); err != nil { 80 return fmt.Errorf("failed inserting kvs entry: %s", err) 81 } 82 83 if err := indexUpdateMaxTxn(s.tx, entry.ModifyIndex, "kvs"); err != nil { 84 return fmt.Errorf("failed updating index: %s", err) 85 } 86 return nil 87 } 88 89 // Tombstone is used when restoring from a snapshot. For general inserts, use 90 // Graveyard.InsertTxn. 91 func (s *Restore) Tombstone(stone *Tombstone) error { 92 if err := s.store.kvsGraveyard.RestoreTxn(s.tx, stone); err != nil { 93 return fmt.Errorf("failed restoring tombstone: %s", err) 94 } 95 return nil 96 } 97 98 // ReapTombstones is used to delete all the tombstones with an index 99 // less than or equal to the given index. This is used to prevent 100 // unbounded storage growth of the tombstones. 101 func (s *Store) ReapTombstones(index uint64) error { 102 tx := s.db.Txn(true) 103 defer tx.Abort() 104 105 if err := s.kvsGraveyard.ReapTxn(tx, index); err != nil { 106 return fmt.Errorf("failed to reap kvs tombstones: %s", err) 107 } 108 109 tx.Commit() 110 return nil 111 } 112 113 // KVSSet is used to store a key/value pair. 114 func (s *Store) KVSSet(idx uint64, entry *structs.DirEntry) error { 115 tx := s.db.Txn(true) 116 defer tx.Abort() 117 118 // Perform the actual set. 119 if err := s.kvsSetTxn(tx, idx, entry, false); err != nil { 120 return err 121 } 122 123 tx.Commit() 124 return nil 125 } 126 127 // kvsSetTxn is used to insert or update a key/value pair in the state 128 // store. It is the inner method used and handles only the actual storage. 129 // If updateSession is true, then the incoming entry will set the new 130 // session (should be validated before calling this). Otherwise, we will keep 131 // whatever the existing session is. 132 func (s *Store) kvsSetTxn(tx *memdb.Txn, idx uint64, entry *structs.DirEntry, updateSession bool) error { 133 // Retrieve an existing KV pair 134 existing, err := tx.First("kvs", "id", entry.Key) 135 if err != nil { 136 return fmt.Errorf("failed kvs lookup: %s", err) 137 } 138 139 // Set the indexes. 140 if existing != nil { 141 entry.CreateIndex = existing.(*structs.DirEntry).CreateIndex 142 } else { 143 entry.CreateIndex = idx 144 } 145 entry.ModifyIndex = idx 146 147 // Preserve the existing session unless told otherwise. The "existing" 148 // session for a new entry is "no session". 149 if !updateSession { 150 if existing != nil { 151 entry.Session = existing.(*structs.DirEntry).Session 152 } else { 153 entry.Session = "" 154 } 155 } 156 157 // Store the kv pair in the state store and update the index. 158 if err := tx.Insert("kvs", entry); err != nil { 159 return fmt.Errorf("failed inserting kvs entry: %s", err) 160 } 161 if err := tx.Insert("index", &IndexEntry{"kvs", idx}); err != nil { 162 return fmt.Errorf("failed updating index: %s", err) 163 } 164 165 return nil 166 } 167 168 // KVSGet is used to retrieve a key/value pair from the state store. 169 func (s *Store) KVSGet(ws memdb.WatchSet, key string) (uint64, *structs.DirEntry, error) { 170 tx := s.db.Txn(false) 171 defer tx.Abort() 172 173 return s.kvsGetTxn(tx, ws, key) 174 } 175 176 // kvsGetTxn is the inner method that gets a KVS entry inside an existing 177 // transaction. 178 func (s *Store) kvsGetTxn(tx *memdb.Txn, ws memdb.WatchSet, key string) (uint64, *structs.DirEntry, error) { 179 // Get the table index. 180 idx := maxIndexTxn(tx, "kvs", "tombstones") 181 182 // Retrieve the key. 183 watchCh, entry, err := tx.FirstWatch("kvs", "id", key) 184 if err != nil { 185 return 0, nil, fmt.Errorf("failed kvs lookup: %s", err) 186 } 187 ws.Add(watchCh) 188 if entry != nil { 189 return idx, entry.(*structs.DirEntry), nil 190 } 191 return idx, nil, nil 192 } 193 194 // KVSList is used to list out all keys under a given prefix. If the 195 // prefix is left empty, all keys in the KVS will be returned. The returned 196 // is the max index of the returned kvs entries or applicable tombstones, or 197 // else it's the full table indexes for kvs and tombstones. 198 func (s *Store) KVSList(ws memdb.WatchSet, prefix string) (uint64, structs.DirEntries, error) { 199 tx := s.db.Txn(false) 200 defer tx.Abort() 201 202 return s.kvsListTxn(tx, ws, prefix) 203 } 204 205 // kvsListTxn is the inner method that gets a list of KVS entries matching a 206 // prefix. 207 func (s *Store) kvsListTxn(tx *memdb.Txn, ws memdb.WatchSet, prefix string) (uint64, structs.DirEntries, error) { 208 // Get the table indexes. 209 idx := maxIndexTxn(tx, "kvs", "tombstones") 210 211 // Query the prefix and list the available keys 212 entries, err := tx.Get("kvs", "id_prefix", prefix) 213 if err != nil { 214 return 0, nil, fmt.Errorf("failed kvs lookup: %s", err) 215 } 216 ws.Add(entries.WatchCh()) 217 218 // Gather all of the keys found in the store 219 var ents structs.DirEntries 220 var lindex uint64 221 for entry := entries.Next(); entry != nil; entry = entries.Next() { 222 e := entry.(*structs.DirEntry) 223 ents = append(ents, e) 224 if e.ModifyIndex > lindex { 225 lindex = e.ModifyIndex 226 } 227 } 228 229 // Check for the highest index in the graveyard. If the prefix is empty 230 // then just use the full table indexes since we are listing everything. 231 if prefix != "" { 232 gindex, err := s.kvsGraveyard.GetMaxIndexTxn(tx, prefix) 233 if err != nil { 234 return 0, nil, fmt.Errorf("failed graveyard lookup: %s", err) 235 } 236 if gindex > lindex { 237 lindex = gindex 238 } 239 } else { 240 lindex = idx 241 } 242 243 // Use the sub index if it was set and there are entries, otherwise use 244 // the full table index from above. 245 if lindex != 0 { 246 idx = lindex 247 } 248 return idx, ents, nil 249 } 250 251 // KVSListKeys is used to query the KV store for keys matching the given prefix. 252 // An optional separator may be specified, which can be used to slice off a part 253 // of the response so that only a subset of the prefix is returned. In this 254 // mode, the keys which are omitted are still counted in the returned index. 255 func (s *Store) KVSListKeys(ws memdb.WatchSet, prefix, sep string) (uint64, []string, error) { 256 tx := s.db.Txn(false) 257 defer tx.Abort() 258 259 // Get the table indexes. 260 idx := maxIndexTxn(tx, "kvs", "tombstones") 261 262 // Fetch keys using the specified prefix 263 entries, err := tx.Get("kvs", "id_prefix", prefix) 264 if err != nil { 265 return 0, nil, fmt.Errorf("failed kvs lookup: %s", err) 266 } 267 ws.Add(entries.WatchCh()) 268 269 prefixLen := len(prefix) 270 sepLen := len(sep) 271 272 var keys []string 273 var lindex uint64 274 var last string 275 for entry := entries.Next(); entry != nil; entry = entries.Next() { 276 e := entry.(*structs.DirEntry) 277 278 // Accumulate the high index 279 if e.ModifyIndex > lindex { 280 lindex = e.ModifyIndex 281 } 282 283 // Always accumulate if no separator provided 284 if sepLen == 0 { 285 keys = append(keys, e.Key) 286 continue 287 } 288 289 // Parse and de-duplicate the returned keys based on the 290 // key separator, if provided. 291 after := e.Key[prefixLen:] 292 sepIdx := strings.Index(after, sep) 293 if sepIdx > -1 { 294 key := e.Key[:prefixLen+sepIdx+sepLen] 295 if key != last { 296 keys = append(keys, key) 297 last = key 298 } 299 } else { 300 keys = append(keys, e.Key) 301 } 302 } 303 304 // Check for the highest index in the graveyard. If the prefix is empty 305 // then just use the full table indexes since we are listing everything. 306 if prefix != "" { 307 gindex, err := s.kvsGraveyard.GetMaxIndexTxn(tx, prefix) 308 if err != nil { 309 return 0, nil, fmt.Errorf("failed graveyard lookup: %s", err) 310 } 311 if gindex > lindex { 312 lindex = gindex 313 } 314 } else { 315 lindex = idx 316 } 317 318 // Use the sub index if it was set and there are entries, otherwise use 319 // the full table index from above. 320 if lindex != 0 { 321 idx = lindex 322 } 323 return idx, keys, nil 324 } 325 326 // KVSDelete is used to perform a shallow delete on a single key in the 327 // the state store. 328 func (s *Store) KVSDelete(idx uint64, key string) error { 329 tx := s.db.Txn(true) 330 defer tx.Abort() 331 332 // Perform the actual delete 333 if err := s.kvsDeleteTxn(tx, idx, key); err != nil { 334 return err 335 } 336 337 tx.Commit() 338 return nil 339 } 340 341 // kvsDeleteTxn is the inner method used to perform the actual deletion 342 // of a key/value pair within an existing transaction. 343 func (s *Store) kvsDeleteTxn(tx *memdb.Txn, idx uint64, key string) error { 344 // Look up the entry in the state store. 345 entry, err := tx.First("kvs", "id", key) 346 if err != nil { 347 return fmt.Errorf("failed kvs lookup: %s", err) 348 } 349 if entry == nil { 350 return nil 351 } 352 353 // Create a tombstone. 354 if err := s.kvsGraveyard.InsertTxn(tx, key, idx); err != nil { 355 return fmt.Errorf("failed adding to graveyard: %s", err) 356 } 357 358 // Delete the entry and update the index. 359 if err := tx.Delete("kvs", entry); err != nil { 360 return fmt.Errorf("failed deleting kvs entry: %s", err) 361 } 362 if err := tx.Insert("index", &IndexEntry{"kvs", idx}); err != nil { 363 return fmt.Errorf("failed updating index: %s", err) 364 } 365 366 return nil 367 } 368 369 // KVSDeleteCAS is used to try doing a KV delete operation with a given 370 // raft index. If the CAS index specified is not equal to the last 371 // observed index for the given key, then the call is a noop, otherwise 372 // a normal KV delete is invoked. 373 func (s *Store) KVSDeleteCAS(idx, cidx uint64, key string) (bool, error) { 374 tx := s.db.Txn(true) 375 defer tx.Abort() 376 377 set, err := s.kvsDeleteCASTxn(tx, idx, cidx, key) 378 if !set || err != nil { 379 return false, err 380 } 381 382 tx.Commit() 383 return true, nil 384 } 385 386 // kvsDeleteCASTxn is the inner method that does a CAS delete within an existing 387 // transaction. 388 func (s *Store) kvsDeleteCASTxn(tx *memdb.Txn, idx, cidx uint64, key string) (bool, error) { 389 // Retrieve the existing kvs entry, if any exists. 390 entry, err := tx.First("kvs", "id", key) 391 if err != nil { 392 return false, fmt.Errorf("failed kvs lookup: %s", err) 393 } 394 395 // If the existing index does not match the provided CAS 396 // index arg, then we shouldn't update anything and can safely 397 // return early here. 398 e, ok := entry.(*structs.DirEntry) 399 if !ok || e.ModifyIndex != cidx { 400 return entry == nil, nil 401 } 402 403 // Call the actual deletion if the above passed. 404 if err := s.kvsDeleteTxn(tx, idx, key); err != nil { 405 return false, err 406 } 407 return true, nil 408 } 409 410 // KVSSetCAS is used to do a check-and-set operation on a KV entry. The 411 // ModifyIndex in the provided entry is used to determine if we should 412 // write the entry to the state store or bail. Returns a bool indicating 413 // if a write happened and any error. 414 func (s *Store) KVSSetCAS(idx uint64, entry *structs.DirEntry) (bool, error) { 415 tx := s.db.Txn(true) 416 defer tx.Abort() 417 418 set, err := s.kvsSetCASTxn(tx, idx, entry) 419 if !set || err != nil { 420 return false, err 421 } 422 423 tx.Commit() 424 return true, nil 425 } 426 427 // kvsSetCASTxn is the inner method used to do a CAS inside an existing 428 // transaction. 429 func (s *Store) kvsSetCASTxn(tx *memdb.Txn, idx uint64, entry *structs.DirEntry) (bool, error) { 430 // Retrieve the existing entry. 431 existing, err := tx.First("kvs", "id", entry.Key) 432 if err != nil { 433 return false, fmt.Errorf("failed kvs lookup: %s", err) 434 } 435 436 // Check if the we should do the set. A ModifyIndex of 0 means that 437 // we are doing a set-if-not-exists. 438 if entry.ModifyIndex == 0 && existing != nil { 439 return false, nil 440 } 441 if entry.ModifyIndex != 0 && existing == nil { 442 return false, nil 443 } 444 e, ok := existing.(*structs.DirEntry) 445 if ok && entry.ModifyIndex != 0 && entry.ModifyIndex != e.ModifyIndex { 446 return false, nil 447 } 448 449 // If we made it this far, we should perform the set. 450 if err := s.kvsSetTxn(tx, idx, entry, false); err != nil { 451 return false, err 452 } 453 return true, nil 454 } 455 456 // KVSDeleteTree is used to do a recursive delete on a key prefix 457 // in the state store. If any keys are modified, the last index is 458 // set, otherwise this is a no-op. 459 func (s *Store) KVSDeleteTree(idx uint64, prefix string) error { 460 tx := s.db.Txn(true) 461 defer tx.Abort() 462 463 if err := s.kvsDeleteTreeTxn(tx, idx, prefix); err != nil { 464 return err 465 } 466 467 tx.Commit() 468 return nil 469 } 470 471 // kvsDeleteTreeTxn is the inner method that does a recursive delete inside an 472 // existing transaction. 473 func (s *Store) kvsDeleteTreeTxn(tx *memdb.Txn, idx uint64, prefix string) error { 474 475 // For prefix deletes, only insert one tombstone and delete the entire subtree 476 477 deleted, err := tx.DeletePrefix("kvs", "id_prefix", prefix) 478 479 if err != nil { 480 return fmt.Errorf("failed recursive deleting kvs entry: %s", err) 481 } 482 483 if deleted { 484 if prefix != "" { // don't insert a tombstone if the entire tree is deleted, all watchers on keys will see the max_index of the tree 485 if err := s.kvsGraveyard.InsertTxn(tx, prefix, idx); err != nil { 486 return fmt.Errorf("failed adding to graveyard: %s", err) 487 } 488 } 489 if err := tx.Insert("index", &IndexEntry{"kvs", idx}); err != nil { 490 return fmt.Errorf("failed updating index: %s", err) 491 } 492 } 493 return nil 494 } 495 496 // KVSLockDelay returns the expiration time for any lock delay associated with 497 // the given key. 498 func (s *Store) KVSLockDelay(key string) time.Time { 499 return s.lockDelay.GetExpiration(key) 500 } 501 502 // KVSLock is similar to KVSSet but only performs the set if the lock can be 503 // acquired. 504 func (s *Store) KVSLock(idx uint64, entry *structs.DirEntry) (bool, error) { 505 tx := s.db.Txn(true) 506 defer tx.Abort() 507 508 locked, err := s.kvsLockTxn(tx, idx, entry) 509 if !locked || err != nil { 510 return false, err 511 } 512 513 tx.Commit() 514 return true, nil 515 } 516 517 // kvsLockTxn is the inner method that does a lock inside an existing 518 // transaction. 519 func (s *Store) kvsLockTxn(tx *memdb.Txn, idx uint64, entry *structs.DirEntry) (bool, error) { 520 // Verify that a session is present. 521 if entry.Session == "" { 522 return false, fmt.Errorf("missing session") 523 } 524 525 // Verify that the session exists. 526 sess, err := tx.First("sessions", "id", entry.Session) 527 if err != nil { 528 return false, fmt.Errorf("failed session lookup: %s", err) 529 } 530 if sess == nil { 531 return false, fmt.Errorf("invalid session %#v", entry.Session) 532 } 533 534 // Retrieve the existing entry. 535 existing, err := tx.First("kvs", "id", entry.Key) 536 if err != nil { 537 return false, fmt.Errorf("failed kvs lookup: %s", err) 538 } 539 540 // Set up the entry, using the existing entry if present. 541 if existing != nil { 542 e := existing.(*structs.DirEntry) 543 if e.Session == entry.Session { 544 // We already hold this lock, good to go. 545 entry.CreateIndex = e.CreateIndex 546 entry.LockIndex = e.LockIndex 547 } else if e.Session != "" { 548 // Bail out, someone else holds this lock. 549 return false, nil 550 } else { 551 // Set up a new lock with this session. 552 entry.CreateIndex = e.CreateIndex 553 entry.LockIndex = e.LockIndex + 1 554 } 555 } else { 556 entry.CreateIndex = idx 557 entry.LockIndex = 1 558 } 559 entry.ModifyIndex = idx 560 561 // If we made it this far, we should perform the set. 562 if err := s.kvsSetTxn(tx, idx, entry, true); err != nil { 563 return false, err 564 } 565 return true, nil 566 } 567 568 // KVSUnlock is similar to KVSSet but only performs the set if the lock can be 569 // unlocked (the key must already exist and be locked). 570 func (s *Store) KVSUnlock(idx uint64, entry *structs.DirEntry) (bool, error) { 571 tx := s.db.Txn(true) 572 defer tx.Abort() 573 574 unlocked, err := s.kvsUnlockTxn(tx, idx, entry) 575 if !unlocked || err != nil { 576 return false, err 577 } 578 579 tx.Commit() 580 return true, nil 581 } 582 583 // kvsUnlockTxn is the inner method that does an unlock inside an existing 584 // transaction. 585 func (s *Store) kvsUnlockTxn(tx *memdb.Txn, idx uint64, entry *structs.DirEntry) (bool, error) { 586 // Verify that a session is present. 587 if entry.Session == "" { 588 return false, fmt.Errorf("missing session") 589 } 590 591 // Retrieve the existing entry. 592 existing, err := tx.First("kvs", "id", entry.Key) 593 if err != nil { 594 return false, fmt.Errorf("failed kvs lookup: %s", err) 595 } 596 597 // Bail if there's no existing key. 598 if existing == nil { 599 return false, nil 600 } 601 602 // Make sure the given session is the lock holder. 603 e := existing.(*structs.DirEntry) 604 if e.Session != entry.Session { 605 return false, nil 606 } 607 608 // Clear the lock and update the entry. 609 entry.Session = "" 610 entry.LockIndex = e.LockIndex 611 entry.CreateIndex = e.CreateIndex 612 entry.ModifyIndex = idx 613 614 // If we made it this far, we should perform the set. 615 if err := s.kvsSetTxn(tx, idx, entry, true); err != nil { 616 return false, err 617 } 618 return true, nil 619 } 620 621 // kvsCheckSessionTxn checks to see if the given session matches the current 622 // entry for a key. 623 func (s *Store) kvsCheckSessionTxn(tx *memdb.Txn, key string, session string) (*structs.DirEntry, error) { 624 entry, err := tx.First("kvs", "id", key) 625 if err != nil { 626 return nil, fmt.Errorf("failed kvs lookup: %s", err) 627 } 628 if entry == nil { 629 return nil, fmt.Errorf("failed to check session, key %q doesn't exist", key) 630 } 631 632 e := entry.(*structs.DirEntry) 633 if e.Session != session { 634 return nil, fmt.Errorf("failed session check for key %q, current session %q != %q", key, e.Session, session) 635 } 636 637 return e, nil 638 } 639 640 // kvsCheckIndexTxn checks to see if the given modify index matches the current 641 // entry for a key. 642 func (s *Store) kvsCheckIndexTxn(tx *memdb.Txn, key string, cidx uint64) (*structs.DirEntry, error) { 643 entry, err := tx.First("kvs", "id", key) 644 if err != nil { 645 return nil, fmt.Errorf("failed kvs lookup: %s", err) 646 } 647 if entry == nil { 648 return nil, fmt.Errorf("failed to check index, key %q doesn't exist", key) 649 } 650 651 e := entry.(*structs.DirEntry) 652 if e.ModifyIndex != cidx { 653 return nil, fmt.Errorf("failed index check for key %q, current modify index %d != %d", key, e.ModifyIndex, cidx) 654 } 655 656 return e, nil 657 }