github.com/bigcommerce/nomad@v0.9.3-bc/client/state/state_database.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "time" 8 9 hclog "github.com/hashicorp/go-hclog" 10 trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" 11 dmstate "github.com/hashicorp/nomad/client/devicemanager/state" 12 driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state" 13 "github.com/hashicorp/nomad/helper/boltdd" 14 "github.com/hashicorp/nomad/nomad/structs" 15 ) 16 17 /* 18 The client has a boltDB backed state store. The schema as of 0.9 looks as follows: 19 20 meta/ 21 |--> version -> '2' (not msgpack encoded) 22 |--> upgraded -> time.Now().Format(timeRFC3339) 23 allocations/ 24 |--> <alloc-id>/ 25 |--> alloc -> allocEntry{*structs.Allocation} 26 |--> deploy_status -> deployStatusEntry{*structs.AllocDeploymentStatus} 27 |--> task-<name>/ 28 |--> local_state -> *trstate.LocalState # Local-only state 29 |--> task_state -> *structs.TaskState # Sync'd to servers 30 31 devicemanager/ 32 |--> plugin_state -> *dmstate.PluginState 33 34 drivermanager/ 35 |--> plugin_state -> *dmstate.PluginState 36 */ 37 38 var ( 39 // metaBucketName is the name of the metadata bucket 40 metaBucketName = []byte("meta") 41 42 // metaVersionKey is the key the state schema version is stored under. 43 metaVersionKey = []byte("version") 44 45 // metaVersion is the value of the state schema version to detect when 46 // an upgrade is needed. It skips the usual boltdd/msgpack backend to 47 // be as portable and futureproof as possible. 48 metaVersion = []byte{'2'} 49 50 // metaUpgradedKey is the key that stores the timestamp of the last 51 // time the schema was upgraded. 52 metaUpgradedKey = []byte("upgraded") 53 54 // allocationsBucketName is the bucket name containing all allocation related 55 // data 56 allocationsBucketName = []byte("allocations") 57 58 // allocKey is the key Allocations are stored under encapsulated in 59 // allocEntry structs. 60 allocKey = []byte("alloc") 61 62 // allocDeployStatusKey is the key *structs.AllocDeploymentStatus is 63 // stored under. 64 allocDeployStatusKey = []byte("deploy_status") 65 66 // allocations -> $allocid -> task-$taskname -> the keys below 67 taskLocalStateKey = []byte("local_state") 68 taskStateKey = []byte("task_state") 69 70 // devManagerBucket is the bucket name containing all device manager related 71 // data 72 devManagerBucket = []byte("devicemanager") 73 74 // driverManagerBucket is the bucket name container all driver manager 75 // related data 76 driverManagerBucket = []byte("drivermanager") 77 78 // managerPluginStateKey is the key by which plugin manager plugin state is 79 // stored at 80 managerPluginStateKey = []byte("plugin_state") 81 ) 82 83 // taskBucketName returns the bucket name for the given task name. 84 func taskBucketName(taskName string) []byte { 85 return []byte("task-" + taskName) 86 } 87 88 // NewStateDBFunc creates a StateDB given a state directory. 89 type NewStateDBFunc func(logger hclog.Logger, stateDir string) (StateDB, error) 90 91 // GetStateDBFactory returns a func for creating a StateDB 92 func GetStateDBFactory(devMode bool) NewStateDBFunc { 93 // Return a noop state db implementation when in debug mode 94 if devMode { 95 return func(hclog.Logger, string) (StateDB, error) { 96 return NoopDB{}, nil 97 } 98 } 99 100 return NewBoltStateDB 101 } 102 103 // BoltStateDB persists and restores Nomad client state in a boltdb. All 104 // methods are safe for concurrent access. 105 type BoltStateDB struct { 106 stateDir string 107 db *boltdd.DB 108 logger hclog.Logger 109 } 110 111 // NewBoltStateDB creates or opens an existing boltdb state file or returns an 112 // error. 113 func NewBoltStateDB(logger hclog.Logger, stateDir string) (StateDB, error) { 114 fn := filepath.Join(stateDir, "state.db") 115 116 // Check to see if the DB already exists 117 fi, err := os.Stat(fn) 118 if err != nil && !os.IsNotExist(err) { 119 return nil, err 120 } 121 firstRun := fi == nil 122 123 // Create or open the boltdb state database 124 db, err := boltdd.Open(fn, 0600, nil) 125 if err != nil { 126 return nil, fmt.Errorf("failed to create state database: %v", err) 127 } 128 129 sdb := &BoltStateDB{ 130 stateDir: stateDir, 131 db: db, 132 logger: logger, 133 } 134 135 // If db did not already exist, initialize metadata fields 136 if firstRun { 137 if err := sdb.init(); err != nil { 138 return nil, err 139 } 140 } 141 142 return sdb, nil 143 } 144 145 func (s *BoltStateDB) Name() string { 146 return "boltdb" 147 } 148 149 // GetAllAllocations gets all allocations persisted by this client and returns 150 // a map of alloc ids to errors for any allocations that could not be restored. 151 // 152 // If a fatal error was encountered it will be returned and the other two 153 // values will be nil. 154 func (s *BoltStateDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, error) { 155 var allocs []*structs.Allocation 156 var errs map[string]error 157 err := s.db.View(func(tx *boltdd.Tx) error { 158 allocs, errs = s.getAllAllocations(tx) 159 return nil 160 }) 161 162 // db.View itself may return an error, so still check 163 if err != nil { 164 return nil, nil, err 165 } 166 167 return allocs, errs, nil 168 } 169 170 // allocEntry wraps values in the Allocations buckets 171 type allocEntry struct { 172 Alloc *structs.Allocation 173 } 174 175 func (s *BoltStateDB) getAllAllocations(tx *boltdd.Tx) ([]*structs.Allocation, map[string]error) { 176 allocs := []*structs.Allocation{} 177 errs := map[string]error{} 178 179 allocationsBkt := tx.Bucket(allocationsBucketName) 180 if allocationsBkt == nil { 181 // No allocs 182 return allocs, errs 183 } 184 185 // Create a cursor for iteration. 186 c := allocationsBkt.BoltBucket().Cursor() 187 188 // Iterate over all the allocation buckets 189 for k, _ := c.First(); k != nil; k, _ = c.Next() { 190 allocID := string(k) 191 allocBkt := allocationsBkt.Bucket(k) 192 if allocBkt == nil { 193 errs[allocID] = fmt.Errorf("missing alloc bucket") 194 continue 195 } 196 197 var ae allocEntry 198 if err := allocBkt.Get(allocKey, &ae); err != nil { 199 errs[allocID] = fmt.Errorf("failed to decode alloc: %v", err) 200 continue 201 } 202 203 allocs = append(allocs, ae.Alloc) 204 } 205 206 return allocs, errs 207 } 208 209 // PutAllocation stores an allocation or returns an error. 210 func (s *BoltStateDB) PutAllocation(alloc *structs.Allocation) error { 211 return s.db.Update(func(tx *boltdd.Tx) error { 212 // Retrieve the root allocations bucket 213 allocsBkt, err := tx.CreateBucketIfNotExists(allocationsBucketName) 214 if err != nil { 215 return err 216 } 217 218 // Retrieve the specific allocations bucket 219 key := []byte(alloc.ID) 220 allocBkt, err := allocsBkt.CreateBucketIfNotExists(key) 221 if err != nil { 222 return err 223 } 224 225 allocState := allocEntry{ 226 Alloc: alloc, 227 } 228 return allocBkt.Put(allocKey, &allocState) 229 }) 230 } 231 232 // deployStatusEntry wraps values for DeploymentStatus keys. 233 type deployStatusEntry struct { 234 DeploymentStatus *structs.AllocDeploymentStatus 235 } 236 237 // PutDeploymentStatus stores an allocation's DeploymentStatus or returns an 238 // error. 239 func (s *BoltStateDB) PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error { 240 return s.db.Update(func(tx *boltdd.Tx) error { 241 return putDeploymentStatusImpl(tx, allocID, ds) 242 }) 243 } 244 245 func putDeploymentStatusImpl(tx *boltdd.Tx, allocID string, ds *structs.AllocDeploymentStatus) error { 246 allocBkt, err := getAllocationBucket(tx, allocID) 247 if err != nil { 248 return err 249 } 250 251 entry := deployStatusEntry{ 252 DeploymentStatus: ds, 253 } 254 return allocBkt.Put(allocDeployStatusKey, &entry) 255 } 256 257 // GetDeploymentStatus retrieves an allocation's DeploymentStatus or returns an 258 // error. 259 func (s *BoltStateDB) GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error) { 260 var entry deployStatusEntry 261 262 err := s.db.View(func(tx *boltdd.Tx) error { 263 allAllocsBkt := tx.Bucket(allocationsBucketName) 264 if allAllocsBkt == nil { 265 // No state, return 266 return nil 267 } 268 269 allocBkt := allAllocsBkt.Bucket([]byte(allocID)) 270 if allocBkt == nil { 271 // No state for alloc, return 272 return nil 273 } 274 275 return allocBkt.Get(allocDeployStatusKey, &entry) 276 }) 277 278 // It's valid for this field to be nil/missing 279 if boltdd.IsErrNotFound(err) { 280 return nil, nil 281 } 282 283 if err != nil { 284 return nil, err 285 } 286 287 return entry.DeploymentStatus, nil 288 } 289 290 // GetTaskRunnerState returns the LocalState and TaskState for a 291 // TaskRunner. LocalState or TaskState will be nil if they do not exist. 292 // 293 // If an error is encountered both LocalState and TaskState will be nil. 294 func (s *BoltStateDB) GetTaskRunnerState(allocID, taskName string) (*trstate.LocalState, *structs.TaskState, error) { 295 var ls *trstate.LocalState 296 var ts *structs.TaskState 297 298 err := s.db.View(func(tx *boltdd.Tx) error { 299 allAllocsBkt := tx.Bucket(allocationsBucketName) 300 if allAllocsBkt == nil { 301 // No state, return 302 return nil 303 } 304 305 allocBkt := allAllocsBkt.Bucket([]byte(allocID)) 306 if allocBkt == nil { 307 // No state for alloc, return 308 return nil 309 } 310 311 taskBkt := allocBkt.Bucket(taskBucketName(taskName)) 312 if taskBkt == nil { 313 // No state for task, return 314 return nil 315 } 316 317 // Restore Local State if it exists 318 ls = &trstate.LocalState{} 319 if err := taskBkt.Get(taskLocalStateKey, ls); err != nil { 320 if !boltdd.IsErrNotFound(err) { 321 return fmt.Errorf("failed to read local task runner state: %v", err) 322 } 323 324 // Key not found, reset ls to nil 325 ls = nil 326 } 327 328 // Restore Task State if it exists 329 ts = &structs.TaskState{} 330 if err := taskBkt.Get(taskStateKey, ts); err != nil { 331 if !boltdd.IsErrNotFound(err) { 332 return fmt.Errorf("failed to read task state: %v", err) 333 } 334 335 // Key not found, reset ts to nil 336 ts = nil 337 } 338 339 return nil 340 }) 341 342 if err != nil { 343 return nil, nil, err 344 } 345 346 return ls, ts, nil 347 } 348 349 // PutTaskRunnerLocalState stores TaskRunner's LocalState or returns an error. 350 func (s *BoltStateDB) PutTaskRunnerLocalState(allocID, taskName string, val *trstate.LocalState) error { 351 return s.db.Update(func(tx *boltdd.Tx) error { 352 return putTaskRunnerLocalStateImpl(tx, allocID, taskName, val) 353 }) 354 } 355 356 // putTaskRunnerLocalStateImpl stores TaskRunner's LocalState in an ongoing 357 // transaction or returns an error. 358 func putTaskRunnerLocalStateImpl(tx *boltdd.Tx, allocID, taskName string, val *trstate.LocalState) error { 359 taskBkt, err := getTaskBucket(tx, allocID, taskName) 360 if err != nil { 361 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 362 } 363 364 if err := taskBkt.Put(taskLocalStateKey, val); err != nil { 365 return fmt.Errorf("failed to write task_runner state: %v", err) 366 } 367 368 return nil 369 } 370 371 // PutTaskState stores a task's state or returns an error. 372 func (s *BoltStateDB) PutTaskState(allocID, taskName string, state *structs.TaskState) error { 373 return s.db.Update(func(tx *boltdd.Tx) error { 374 return putTaskStateImpl(tx, allocID, taskName, state) 375 }) 376 } 377 378 // putTaskStateImpl stores a task's state in an ongoing transaction or returns 379 // an error. 380 func putTaskStateImpl(tx *boltdd.Tx, allocID, taskName string, state *structs.TaskState) error { 381 taskBkt, err := getTaskBucket(tx, allocID, taskName) 382 if err != nil { 383 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 384 } 385 386 return taskBkt.Put(taskStateKey, state) 387 } 388 389 // DeleteTaskBucket is used to delete a task bucket if it exists. 390 func (s *BoltStateDB) DeleteTaskBucket(allocID, taskName string) error { 391 return s.db.Update(func(tx *boltdd.Tx) error { 392 // Retrieve the root allocations bucket 393 allocations := tx.Bucket(allocationsBucketName) 394 if allocations == nil { 395 return nil 396 } 397 398 // Retrieve the specific allocations bucket 399 alloc := allocations.Bucket([]byte(allocID)) 400 if alloc == nil { 401 return nil 402 } 403 404 // Check if the bucket exists 405 key := taskBucketName(taskName) 406 return alloc.DeleteBucket(key) 407 }) 408 } 409 410 // DeleteAllocationBucket is used to delete an allocation bucket if it exists. 411 func (s *BoltStateDB) DeleteAllocationBucket(allocID string) error { 412 return s.db.Update(func(tx *boltdd.Tx) error { 413 // Retrieve the root allocations bucket 414 allocations := tx.Bucket(allocationsBucketName) 415 if allocations == nil { 416 return nil 417 } 418 419 key := []byte(allocID) 420 return allocations.DeleteBucket(key) 421 }) 422 } 423 424 // Close releases all database resources and unlocks the database file on disk. 425 // All transactions must be closed before closing the database. 426 func (s *BoltStateDB) Close() error { 427 return s.db.Close() 428 } 429 430 // getAllocationBucket returns the bucket used to persist state about a 431 // particular allocation. If the root allocation bucket or the specific 432 // allocation bucket doesn't exist, it will be created as long as the 433 // transaction is writable. 434 func getAllocationBucket(tx *boltdd.Tx, allocID string) (*boltdd.Bucket, error) { 435 var err error 436 w := tx.Writable() 437 438 // Retrieve the root allocations bucket 439 allocations := tx.Bucket(allocationsBucketName) 440 if allocations == nil { 441 if !w { 442 return nil, fmt.Errorf("Allocations bucket doesn't exist and transaction is not writable") 443 } 444 445 allocations, err = tx.CreateBucketIfNotExists(allocationsBucketName) 446 if err != nil { 447 return nil, err 448 } 449 } 450 451 // Retrieve the specific allocations bucket 452 key := []byte(allocID) 453 alloc := allocations.Bucket(key) 454 if alloc == nil { 455 if !w { 456 return nil, fmt.Errorf("Allocation bucket doesn't exist and transaction is not writable") 457 } 458 459 alloc, err = allocations.CreateBucket(key) 460 if err != nil { 461 return nil, err 462 } 463 } 464 465 return alloc, nil 466 } 467 468 // getTaskBucket returns the bucket used to persist state about a 469 // particular task. If the root allocation bucket, the specific 470 // allocation or task bucket doesn't exist, they will be created as long as the 471 // transaction is writable. 472 func getTaskBucket(tx *boltdd.Tx, allocID, taskName string) (*boltdd.Bucket, error) { 473 alloc, err := getAllocationBucket(tx, allocID) 474 if err != nil { 475 return nil, err 476 } 477 478 // Retrieve the specific task bucket 479 w := tx.Writable() 480 key := taskBucketName(taskName) 481 task := alloc.Bucket(key) 482 if task == nil { 483 if !w { 484 return nil, fmt.Errorf("Task bucket doesn't exist and transaction is not writable") 485 } 486 487 task, err = alloc.CreateBucket(key) 488 if err != nil { 489 return nil, err 490 } 491 } 492 493 return task, nil 494 } 495 496 // PutDevicePluginState stores the device manager's plugin state or returns an 497 // error. 498 func (s *BoltStateDB) PutDevicePluginState(ps *dmstate.PluginState) error { 499 return s.db.Update(func(tx *boltdd.Tx) error { 500 // Retrieve the root device manager bucket 501 devBkt, err := tx.CreateBucketIfNotExists(devManagerBucket) 502 if err != nil { 503 return err 504 } 505 506 return devBkt.Put(managerPluginStateKey, ps) 507 }) 508 } 509 510 // GetDevicePluginState stores the device manager's plugin state or returns an 511 // error. 512 func (s *BoltStateDB) GetDevicePluginState() (*dmstate.PluginState, error) { 513 var ps *dmstate.PluginState 514 515 err := s.db.View(func(tx *boltdd.Tx) error { 516 devBkt := tx.Bucket(devManagerBucket) 517 if devBkt == nil { 518 // No state, return 519 return nil 520 } 521 522 // Restore Plugin State if it exists 523 ps = &dmstate.PluginState{} 524 if err := devBkt.Get(managerPluginStateKey, ps); err != nil { 525 if !boltdd.IsErrNotFound(err) { 526 return fmt.Errorf("failed to read device manager plugin state: %v", err) 527 } 528 529 // Key not found, reset ps to nil 530 ps = nil 531 } 532 533 return nil 534 }) 535 536 if err != nil { 537 return nil, err 538 } 539 540 return ps, nil 541 } 542 543 // PutDriverPluginState stores the driver manager's plugin state or returns an 544 // error. 545 func (s *BoltStateDB) PutDriverPluginState(ps *driverstate.PluginState) error { 546 return s.db.Update(func(tx *boltdd.Tx) error { 547 // Retrieve the root driver manager bucket 548 driverBkt, err := tx.CreateBucketIfNotExists(driverManagerBucket) 549 if err != nil { 550 return err 551 } 552 553 return driverBkt.Put(managerPluginStateKey, ps) 554 }) 555 } 556 557 // GetDriverPluginState stores the driver manager's plugin state or returns an 558 // error. 559 func (s *BoltStateDB) GetDriverPluginState() (*driverstate.PluginState, error) { 560 var ps *driverstate.PluginState 561 562 err := s.db.View(func(tx *boltdd.Tx) error { 563 driverBkt := tx.Bucket(driverManagerBucket) 564 if driverBkt == nil { 565 // No state, return 566 return nil 567 } 568 569 // Restore Plugin State if it exists 570 ps = &driverstate.PluginState{} 571 if err := driverBkt.Get(managerPluginStateKey, ps); err != nil { 572 if !boltdd.IsErrNotFound(err) { 573 return fmt.Errorf("failed to read driver manager plugin state: %v", err) 574 } 575 576 // Key not found, reset ps to nil 577 ps = nil 578 } 579 580 return nil 581 }) 582 583 if err != nil { 584 return nil, err 585 } 586 587 return ps, nil 588 } 589 590 // init initializes metadata entries in a newly created state database. 591 func (s *BoltStateDB) init() error { 592 return s.db.Update(func(tx *boltdd.Tx) error { 593 return addMeta(tx.BoltTx()) 594 }) 595 } 596 597 // Upgrade bolt state db from 0.8 schema to 0.9 schema. Noop if already using 598 // 0.9 schema. Creates a backup before upgrading. 599 func (s *BoltStateDB) Upgrade() error { 600 // Check to see if the underlying DB needs upgrading. 601 upgrade, err := NeedsUpgrade(s.db.BoltDB()) 602 if err != nil { 603 return err 604 } 605 if !upgrade { 606 // No upgrade needed! 607 return nil 608 } 609 610 // Upgraded needed. Backup the boltdb first. 611 backupFileName := filepath.Join(s.stateDir, "state.db.backup") 612 if err := backupDB(s.db.BoltDB(), backupFileName); err != nil { 613 return fmt.Errorf("error backing up state db: %v", err) 614 } 615 616 // Perform the upgrade 617 if err := s.db.Update(func(tx *boltdd.Tx) error { 618 if err := UpgradeAllocs(s.logger, tx); err != nil { 619 return err 620 } 621 622 // Add standard metadata 623 if err := addMeta(tx.BoltTx()); err != nil { 624 return err 625 } 626 627 // Write the time the upgrade was done 628 bkt, err := tx.CreateBucketIfNotExists(metaBucketName) 629 if err != nil { 630 return err 631 } 632 return bkt.Put(metaUpgradedKey, time.Now().Format(time.RFC3339)) 633 }); err != nil { 634 return err 635 } 636 637 s.logger.Info("successfully upgraded state") 638 return nil 639 } 640 641 // DB allows access to the underlying BoltDB for testing purposes. 642 func (s *BoltStateDB) DB() *boltdd.DB { 643 return s.db 644 }