github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/state/state_database.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "time" 8 9 "github.com/boltdb/bolt" 10 11 hclog "github.com/hashicorp/go-hclog" 12 trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" 13 dmstate "github.com/hashicorp/nomad/client/devicemanager/state" 14 "github.com/hashicorp/nomad/client/dynamicplugins" 15 driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state" 16 "github.com/hashicorp/nomad/helper/boltdd" 17 "github.com/hashicorp/nomad/nomad/structs" 18 ) 19 20 /* 21 The client has a boltDB backed state store. The schema as of 0.9 looks as follows: 22 23 meta/ 24 |--> version -> '2' (not msgpack encoded) 25 |--> upgraded -> time.Now().Format(timeRFC3339) 26 allocations/ 27 |--> <alloc-id>/ 28 |--> alloc -> allocEntry{*structs.Allocation} 29 |--> deploy_status -> deployStatusEntry{*structs.AllocDeploymentStatus} 30 |--> task-<name>/ 31 |--> local_state -> *trstate.LocalState # Local-only state 32 |--> task_state -> *structs.TaskState # Sync'd to servers 33 34 devicemanager/ 35 |--> plugin_state -> *dmstate.PluginState 36 37 drivermanager/ 38 |--> plugin_state -> *driverstate.PluginState 39 40 dynamicplugins/ 41 |--> registry_state -> *dynamicplugins.RegistryState 42 */ 43 44 var ( 45 // metaBucketName is the name of the metadata bucket 46 metaBucketName = []byte("meta") 47 48 // metaVersionKey is the key the state schema version is stored under. 49 metaVersionKey = []byte("version") 50 51 // metaVersion is the value of the state schema version to detect when 52 // an upgrade is needed. It skips the usual boltdd/msgpack backend to 53 // be as portable and futureproof as possible. 54 metaVersion = []byte{'2'} 55 56 // metaUpgradedKey is the key that stores the timestamp of the last 57 // time the schema was upgraded. 58 metaUpgradedKey = []byte("upgraded") 59 60 // allocationsBucketName is the bucket name containing all allocation related 61 // data 62 allocationsBucketName = []byte("allocations") 63 64 // allocKey is the key Allocations are stored under encapsulated in 65 // allocEntry structs. 66 allocKey = []byte("alloc") 67 68 // allocDeployStatusKey is the key *structs.AllocDeploymentStatus is 69 // stored under. 70 allocDeployStatusKey = []byte("deploy_status") 71 72 // allocations -> $allocid -> task-$taskname -> the keys below 73 taskLocalStateKey = []byte("local_state") 74 taskStateKey = []byte("task_state") 75 76 // devManagerBucket is the bucket name containing all device manager related 77 // data 78 devManagerBucket = []byte("devicemanager") 79 80 // driverManagerBucket is the bucket name containing all driver manager 81 // related data 82 driverManagerBucket = []byte("drivermanager") 83 84 // managerPluginStateKey is the key by which plugin manager plugin state is 85 // stored at 86 managerPluginStateKey = []byte("plugin_state") 87 88 // dynamicPluginBucket is the bucket name containing all dynamic plugin 89 // registry data. each dynamic plugin registry will have its own subbucket. 90 dynamicPluginBucket = []byte("dynamicplugins") 91 92 // registryStateKey is the key at which dynamic plugin registry state is stored 93 registryStateKey = []byte("registry_state") 94 ) 95 96 // taskBucketName returns the bucket name for the given task name. 97 func taskBucketName(taskName string) []byte { 98 return []byte("task-" + taskName) 99 } 100 101 // NewStateDBFunc creates a StateDB given a state directory. 102 type NewStateDBFunc func(logger hclog.Logger, stateDir string) (StateDB, error) 103 104 // GetStateDBFactory returns a func for creating a StateDB 105 func GetStateDBFactory(devMode bool) NewStateDBFunc { 106 // Return a noop state db implementation when in debug mode 107 if devMode { 108 return func(hclog.Logger, string) (StateDB, error) { 109 return NoopDB{}, nil 110 } 111 } 112 113 return NewBoltStateDB 114 } 115 116 // BoltStateDB persists and restores Nomad client state in a boltdb. All 117 // methods are safe for concurrent access. 118 type BoltStateDB struct { 119 stateDir string 120 db *boltdd.DB 121 logger hclog.Logger 122 } 123 124 // NewBoltStateDB creates or opens an existing boltdb state file or returns an 125 // error. 126 func NewBoltStateDB(logger hclog.Logger, stateDir string) (StateDB, error) { 127 fn := filepath.Join(stateDir, "state.db") 128 129 // Check to see if the DB already exists 130 fi, err := os.Stat(fn) 131 if err != nil && !os.IsNotExist(err) { 132 return nil, err 133 } 134 firstRun := fi == nil 135 136 // Timeout to force failure when accessing a data dir that is already in use 137 timeout := &bolt.Options{Timeout: 5 * time.Second} 138 139 // Create or open the boltdb state database 140 db, err := boltdd.Open(fn, 0600, timeout) 141 if err == bolt.ErrTimeout { 142 return nil, fmt.Errorf("timed out while opening database, is another Nomad process accessing data_dir %s?", stateDir) 143 } else if err != nil { 144 return nil, fmt.Errorf("failed to create state database: %v", err) 145 } 146 147 sdb := &BoltStateDB{ 148 stateDir: stateDir, 149 db: db, 150 logger: logger, 151 } 152 153 // If db did not already exist, initialize metadata fields 154 if firstRun { 155 if err := sdb.init(); err != nil { 156 return nil, err 157 } 158 } 159 160 return sdb, nil 161 } 162 163 func (s *BoltStateDB) Name() string { 164 return "boltdb" 165 } 166 167 // GetAllAllocations gets all allocations persisted by this client and returns 168 // a map of alloc ids to errors for any allocations that could not be restored. 169 // 170 // If a fatal error was encountered it will be returned and the other two 171 // values will be nil. 172 func (s *BoltStateDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, error) { 173 var allocs []*structs.Allocation 174 var errs map[string]error 175 err := s.db.View(func(tx *boltdd.Tx) error { 176 allocs, errs = s.getAllAllocations(tx) 177 return nil 178 }) 179 180 // db.View itself may return an error, so still check 181 if err != nil { 182 return nil, nil, err 183 } 184 185 return allocs, errs, nil 186 } 187 188 // allocEntry wraps values in the Allocations buckets 189 type allocEntry struct { 190 Alloc *structs.Allocation 191 } 192 193 func (s *BoltStateDB) getAllAllocations(tx *boltdd.Tx) ([]*structs.Allocation, map[string]error) { 194 allocs := []*structs.Allocation{} 195 errs := map[string]error{} 196 197 allocationsBkt := tx.Bucket(allocationsBucketName) 198 if allocationsBkt == nil { 199 // No allocs 200 return allocs, errs 201 } 202 203 // Create a cursor for iteration. 204 c := allocationsBkt.BoltBucket().Cursor() 205 206 // Iterate over all the allocation buckets 207 for k, _ := c.First(); k != nil; k, _ = c.Next() { 208 allocID := string(k) 209 allocBkt := allocationsBkt.Bucket(k) 210 if allocBkt == nil { 211 errs[allocID] = fmt.Errorf("missing alloc bucket") 212 continue 213 } 214 215 var ae allocEntry 216 if err := allocBkt.Get(allocKey, &ae); err != nil { 217 errs[allocID] = fmt.Errorf("failed to decode alloc: %v", err) 218 continue 219 } 220 221 // Handle upgrade path 222 ae.Alloc.Canonicalize() 223 ae.Alloc.Job.Canonicalize() 224 225 allocs = append(allocs, ae.Alloc) 226 } 227 228 return allocs, errs 229 } 230 231 // PutAllocation stores an allocation or returns an error. 232 func (s *BoltStateDB) PutAllocation(alloc *structs.Allocation) error { 233 return s.db.Update(func(tx *boltdd.Tx) error { 234 // Retrieve the root allocations bucket 235 allocsBkt, err := tx.CreateBucketIfNotExists(allocationsBucketName) 236 if err != nil { 237 return err 238 } 239 240 // Retrieve the specific allocations bucket 241 key := []byte(alloc.ID) 242 allocBkt, err := allocsBkt.CreateBucketIfNotExists(key) 243 if err != nil { 244 return err 245 } 246 247 allocState := allocEntry{ 248 Alloc: alloc, 249 } 250 return allocBkt.Put(allocKey, &allocState) 251 }) 252 } 253 254 // deployStatusEntry wraps values for DeploymentStatus keys. 255 type deployStatusEntry struct { 256 DeploymentStatus *structs.AllocDeploymentStatus 257 } 258 259 // PutDeploymentStatus stores an allocation's DeploymentStatus or returns an 260 // error. 261 func (s *BoltStateDB) PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error { 262 return s.db.Update(func(tx *boltdd.Tx) error { 263 return putDeploymentStatusImpl(tx, allocID, ds) 264 }) 265 } 266 267 func putDeploymentStatusImpl(tx *boltdd.Tx, allocID string, ds *structs.AllocDeploymentStatus) error { 268 allocBkt, err := getAllocationBucket(tx, allocID) 269 if err != nil { 270 return err 271 } 272 273 entry := deployStatusEntry{ 274 DeploymentStatus: ds, 275 } 276 return allocBkt.Put(allocDeployStatusKey, &entry) 277 } 278 279 // GetDeploymentStatus retrieves an allocation's DeploymentStatus or returns an 280 // error. 281 func (s *BoltStateDB) GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error) { 282 var entry deployStatusEntry 283 284 err := s.db.View(func(tx *boltdd.Tx) error { 285 allAllocsBkt := tx.Bucket(allocationsBucketName) 286 if allAllocsBkt == nil { 287 // No state, return 288 return nil 289 } 290 291 allocBkt := allAllocsBkt.Bucket([]byte(allocID)) 292 if allocBkt == nil { 293 // No state for alloc, return 294 return nil 295 } 296 297 return allocBkt.Get(allocDeployStatusKey, &entry) 298 }) 299 300 // It's valid for this field to be nil/missing 301 if boltdd.IsErrNotFound(err) { 302 return nil, nil 303 } 304 305 if err != nil { 306 return nil, err 307 } 308 309 return entry.DeploymentStatus, nil 310 } 311 312 // GetTaskRunnerState returns the LocalState and TaskState for a 313 // TaskRunner. LocalState or TaskState will be nil if they do not exist. 314 // 315 // If an error is encountered both LocalState and TaskState will be nil. 316 func (s *BoltStateDB) GetTaskRunnerState(allocID, taskName string) (*trstate.LocalState, *structs.TaskState, error) { 317 var ls *trstate.LocalState 318 var ts *structs.TaskState 319 320 err := s.db.View(func(tx *boltdd.Tx) error { 321 allAllocsBkt := tx.Bucket(allocationsBucketName) 322 if allAllocsBkt == nil { 323 // No state, return 324 return nil 325 } 326 327 allocBkt := allAllocsBkt.Bucket([]byte(allocID)) 328 if allocBkt == nil { 329 // No state for alloc, return 330 return nil 331 } 332 333 taskBkt := allocBkt.Bucket(taskBucketName(taskName)) 334 if taskBkt == nil { 335 // No state for task, return 336 return nil 337 } 338 339 // Restore Local State if it exists 340 ls = &trstate.LocalState{} 341 if err := taskBkt.Get(taskLocalStateKey, ls); err != nil { 342 if !boltdd.IsErrNotFound(err) { 343 return fmt.Errorf("failed to read local task runner state: %v", err) 344 } 345 346 // Key not found, reset ls to nil 347 ls = nil 348 } 349 350 // Restore Task State if it exists 351 ts = &structs.TaskState{} 352 if err := taskBkt.Get(taskStateKey, ts); err != nil { 353 if !boltdd.IsErrNotFound(err) { 354 return fmt.Errorf("failed to read task state: %v", err) 355 } 356 357 // Key not found, reset ts to nil 358 ts = nil 359 } 360 361 return nil 362 }) 363 364 if err != nil { 365 return nil, nil, err 366 } 367 368 return ls, ts, nil 369 } 370 371 // PutTaskRunnerLocalState stores TaskRunner's LocalState or returns an error. 372 func (s *BoltStateDB) PutTaskRunnerLocalState(allocID, taskName string, val *trstate.LocalState) error { 373 return s.db.Update(func(tx *boltdd.Tx) error { 374 return putTaskRunnerLocalStateImpl(tx, allocID, taskName, val) 375 }) 376 } 377 378 // putTaskRunnerLocalStateImpl stores TaskRunner's LocalState in an ongoing 379 // transaction or returns an error. 380 func putTaskRunnerLocalStateImpl(tx *boltdd.Tx, allocID, taskName string, val *trstate.LocalState) error { 381 taskBkt, err := getTaskBucket(tx, allocID, taskName) 382 if err != nil { 383 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 384 } 385 386 if err := taskBkt.Put(taskLocalStateKey, val); err != nil { 387 return fmt.Errorf("failed to write task_runner state: %v", err) 388 } 389 390 return nil 391 } 392 393 // PutTaskState stores a task's state or returns an error. 394 func (s *BoltStateDB) PutTaskState(allocID, taskName string, state *structs.TaskState) error { 395 return s.db.Update(func(tx *boltdd.Tx) error { 396 return putTaskStateImpl(tx, allocID, taskName, state) 397 }) 398 } 399 400 // putTaskStateImpl stores a task's state in an ongoing transaction or returns 401 // an error. 402 func putTaskStateImpl(tx *boltdd.Tx, allocID, taskName string, state *structs.TaskState) error { 403 taskBkt, err := getTaskBucket(tx, allocID, taskName) 404 if err != nil { 405 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 406 } 407 408 return taskBkt.Put(taskStateKey, state) 409 } 410 411 // DeleteTaskBucket is used to delete a task bucket if it exists. 412 func (s *BoltStateDB) DeleteTaskBucket(allocID, taskName string) error { 413 return s.db.Update(func(tx *boltdd.Tx) error { 414 // Retrieve the root allocations bucket 415 allocations := tx.Bucket(allocationsBucketName) 416 if allocations == nil { 417 return nil 418 } 419 420 // Retrieve the specific allocations bucket 421 alloc := allocations.Bucket([]byte(allocID)) 422 if alloc == nil { 423 return nil 424 } 425 426 // Check if the bucket exists 427 key := taskBucketName(taskName) 428 return alloc.DeleteBucket(key) 429 }) 430 } 431 432 // DeleteAllocationBucket is used to delete an allocation bucket if it exists. 433 func (s *BoltStateDB) DeleteAllocationBucket(allocID string) error { 434 return s.db.Update(func(tx *boltdd.Tx) error { 435 // Retrieve the root allocations bucket 436 allocations := tx.Bucket(allocationsBucketName) 437 if allocations == nil { 438 return nil 439 } 440 441 key := []byte(allocID) 442 return allocations.DeleteBucket(key) 443 }) 444 } 445 446 // Close releases all database resources and unlocks the database file on disk. 447 // All transactions must be closed before closing the database. 448 func (s *BoltStateDB) Close() error { 449 return s.db.Close() 450 } 451 452 // getAllocationBucket returns the bucket used to persist state about a 453 // particular allocation. If the root allocation bucket or the specific 454 // allocation bucket doesn't exist, it will be created as long as the 455 // transaction is writable. 456 func getAllocationBucket(tx *boltdd.Tx, allocID string) (*boltdd.Bucket, error) { 457 var err error 458 w := tx.Writable() 459 460 // Retrieve the root allocations bucket 461 allocations := tx.Bucket(allocationsBucketName) 462 if allocations == nil { 463 if !w { 464 return nil, fmt.Errorf("Allocations bucket doesn't exist and transaction is not writable") 465 } 466 467 allocations, err = tx.CreateBucketIfNotExists(allocationsBucketName) 468 if err != nil { 469 return nil, err 470 } 471 } 472 473 // Retrieve the specific allocations bucket 474 key := []byte(allocID) 475 alloc := allocations.Bucket(key) 476 if alloc == nil { 477 if !w { 478 return nil, fmt.Errorf("Allocation bucket doesn't exist and transaction is not writable") 479 } 480 481 alloc, err = allocations.CreateBucket(key) 482 if err != nil { 483 return nil, err 484 } 485 } 486 487 return alloc, nil 488 } 489 490 // getTaskBucket returns the bucket used to persist state about a 491 // particular task. If the root allocation bucket, the specific 492 // allocation or task bucket doesn't exist, they will be created as long as the 493 // transaction is writable. 494 func getTaskBucket(tx *boltdd.Tx, allocID, taskName string) (*boltdd.Bucket, error) { 495 alloc, err := getAllocationBucket(tx, allocID) 496 if err != nil { 497 return nil, err 498 } 499 500 // Retrieve the specific task bucket 501 w := tx.Writable() 502 key := taskBucketName(taskName) 503 task := alloc.Bucket(key) 504 if task == nil { 505 if !w { 506 return nil, fmt.Errorf("Task bucket doesn't exist and transaction is not writable") 507 } 508 509 task, err = alloc.CreateBucket(key) 510 if err != nil { 511 return nil, err 512 } 513 } 514 515 return task, nil 516 } 517 518 // PutDevicePluginState stores the device manager's plugin state or returns an 519 // error. 520 func (s *BoltStateDB) PutDevicePluginState(ps *dmstate.PluginState) error { 521 return s.db.Update(func(tx *boltdd.Tx) error { 522 // Retrieve the root device manager bucket 523 devBkt, err := tx.CreateBucketIfNotExists(devManagerBucket) 524 if err != nil { 525 return err 526 } 527 528 return devBkt.Put(managerPluginStateKey, ps) 529 }) 530 } 531 532 // GetDevicePluginState stores the device manager's plugin state or returns an 533 // error. 534 func (s *BoltStateDB) GetDevicePluginState() (*dmstate.PluginState, error) { 535 var ps *dmstate.PluginState 536 537 err := s.db.View(func(tx *boltdd.Tx) error { 538 devBkt := tx.Bucket(devManagerBucket) 539 if devBkt == nil { 540 // No state, return 541 return nil 542 } 543 544 // Restore Plugin State if it exists 545 ps = &dmstate.PluginState{} 546 if err := devBkt.Get(managerPluginStateKey, ps); err != nil { 547 if !boltdd.IsErrNotFound(err) { 548 return fmt.Errorf("failed to read device manager plugin state: %v", err) 549 } 550 551 // Key not found, reset ps to nil 552 ps = nil 553 } 554 555 return nil 556 }) 557 558 if err != nil { 559 return nil, err 560 } 561 562 return ps, nil 563 } 564 565 // PutDriverPluginState stores the driver manager's plugin state or returns an 566 // error. 567 func (s *BoltStateDB) PutDriverPluginState(ps *driverstate.PluginState) error { 568 return s.db.Update(func(tx *boltdd.Tx) error { 569 // Retrieve the root driver manager bucket 570 driverBkt, err := tx.CreateBucketIfNotExists(driverManagerBucket) 571 if err != nil { 572 return err 573 } 574 575 return driverBkt.Put(managerPluginStateKey, ps) 576 }) 577 } 578 579 // GetDriverPluginState stores the driver manager's plugin state or returns an 580 // error. 581 func (s *BoltStateDB) GetDriverPluginState() (*driverstate.PluginState, error) { 582 var ps *driverstate.PluginState 583 584 err := s.db.View(func(tx *boltdd.Tx) error { 585 driverBkt := tx.Bucket(driverManagerBucket) 586 if driverBkt == nil { 587 // No state, return 588 return nil 589 } 590 591 // Restore Plugin State if it exists 592 ps = &driverstate.PluginState{} 593 if err := driverBkt.Get(managerPluginStateKey, ps); err != nil { 594 if !boltdd.IsErrNotFound(err) { 595 return fmt.Errorf("failed to read driver manager plugin state: %v", err) 596 } 597 598 // Key not found, reset ps to nil 599 ps = nil 600 } 601 602 return nil 603 }) 604 605 if err != nil { 606 return nil, err 607 } 608 609 return ps, nil 610 } 611 612 // PutDynamicPluginRegistryState stores the dynamic plugin registry's 613 // state or returns an error. 614 func (s *BoltStateDB) PutDynamicPluginRegistryState(ps *dynamicplugins.RegistryState) error { 615 return s.db.Update(func(tx *boltdd.Tx) error { 616 // Retrieve the root dynamic plugin manager bucket 617 dynamicBkt, err := tx.CreateBucketIfNotExists(dynamicPluginBucket) 618 if err != nil { 619 return err 620 } 621 return dynamicBkt.Put(registryStateKey, ps) 622 }) 623 } 624 625 // GetDynamicPluginRegistryState stores the dynamic plugin registry's 626 // registry state or returns an error. 627 func (s *BoltStateDB) GetDynamicPluginRegistryState() (*dynamicplugins.RegistryState, error) { 628 var ps *dynamicplugins.RegistryState 629 630 err := s.db.View(func(tx *boltdd.Tx) error { 631 dynamicBkt := tx.Bucket(dynamicPluginBucket) 632 if dynamicBkt == nil { 633 // No state, return 634 return nil 635 } 636 637 // Restore Plugin State if it exists 638 ps = &dynamicplugins.RegistryState{} 639 if err := dynamicBkt.Get(registryStateKey, ps); err != nil { 640 if !boltdd.IsErrNotFound(err) { 641 return fmt.Errorf("failed to read dynamic plugin registry state: %v", err) 642 } 643 644 // Key not found, reset ps to nil 645 ps = nil 646 } 647 648 return nil 649 }) 650 651 if err != nil { 652 return nil, err 653 } 654 655 return ps, nil 656 } 657 658 // init initializes metadata entries in a newly created state database. 659 func (s *BoltStateDB) init() error { 660 return s.db.Update(func(tx *boltdd.Tx) error { 661 return addMeta(tx.BoltTx()) 662 }) 663 } 664 665 // Upgrade bolt state db from 0.8 schema to 0.9 schema. Noop if already using 666 // 0.9 schema. Creates a backup before upgrading. 667 func (s *BoltStateDB) Upgrade() error { 668 // Check to see if the underlying DB needs upgrading. 669 upgrade, err := NeedsUpgrade(s.db.BoltDB()) 670 if err != nil { 671 return err 672 } 673 if !upgrade { 674 // No upgrade needed! 675 return nil 676 } 677 678 // Upgraded needed. Backup the boltdb first. 679 backupFileName := filepath.Join(s.stateDir, "state.db.backup") 680 if err := backupDB(s.db.BoltDB(), backupFileName); err != nil { 681 return fmt.Errorf("error backing up state db: %v", err) 682 } 683 684 // Perform the upgrade 685 if err := s.db.Update(func(tx *boltdd.Tx) error { 686 if err := UpgradeAllocs(s.logger, tx); err != nil { 687 return err 688 } 689 690 // Add standard metadata 691 if err := addMeta(tx.BoltTx()); err != nil { 692 return err 693 } 694 695 // Write the time the upgrade was done 696 bkt, err := tx.CreateBucketIfNotExists(metaBucketName) 697 if err != nil { 698 return err 699 } 700 return bkt.Put(metaUpgradedKey, time.Now().Format(time.RFC3339)) 701 }); err != nil { 702 return err 703 } 704 705 s.logger.Info("successfully upgraded state") 706 return nil 707 } 708 709 // DB allows access to the underlying BoltDB for testing purposes. 710 func (s *BoltStateDB) DB() *boltdd.DB { 711 return s.db 712 }